Repository: thuml/Time-Series-Library Branch: main Commit: 7c2820986dcd Files: 363 Total size: 1.2 MB Directory structure: gitextract_frlvujza/ ├── .gitignore ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── README_zh.md ├── data_provider/ │ ├── __init__.py │ ├── data_factory.py │ ├── data_loader.py │ ├── m4.py │ └── uea.py ├── docker-compose.yml ├── exp/ │ ├── __init__.py │ ├── exp_anomaly_detection.py │ ├── exp_basic.py │ ├── exp_classification.py │ ├── exp_imputation.py │ ├── exp_long_term_forecasting.py │ ├── exp_short_term_forecasting.py │ └── exp_zero_shot_forecasting.py ├── layers/ │ ├── AutoCorrelation.py │ ├── Autoformer_EncDec.py │ ├── Conv_Blocks.py │ ├── Crossformer_EncDec.py │ ├── DWT_Decomposition.py │ ├── ETSformer_EncDec.py │ ├── Embed.py │ ├── FourierCorrelation.py │ ├── MSGBlock.py │ ├── MambaBlock.py │ ├── MultiWaveletCorrelation.py │ ├── Pyraformer_EncDec.py │ ├── SelfAttention_Family.py │ ├── StandardNorm.py │ ├── TimeFilter_layers.py │ ├── Transformer_EncDec.py │ └── __init__.py ├── models/ │ ├── Autoformer.py │ ├── Chronos.py │ ├── Chronos2.py │ ├── Crossformer.py │ ├── DLinear.py │ ├── ETSformer.py │ ├── FEDformer.py │ ├── FiLM.py │ ├── FreTS.py │ ├── Informer.py │ ├── KANAD.py │ ├── Koopa.py │ ├── LightTS.py │ ├── MICN.py │ ├── MSGNet.py │ ├── Mamba.py │ ├── MambaSimple.py │ ├── MambaSingleLayer.py │ ├── Moirai.py │ ├── MultiPatchFormer.py │ ├── Nonstationary_Transformer.py │ ├── PAttn.py │ ├── PatchTST.py │ ├── Pyraformer.py │ ├── Reformer.py │ ├── SCINet.py │ ├── SegRNN.py │ ├── Sundial.py │ ├── TSMixer.py │ ├── TemporalFusionTransformer.py │ ├── TiDE.py │ ├── TiRex.py │ ├── TimeFilter.py │ ├── TimeMixer.py │ ├── TimeMoE.py │ ├── TimeXer.py │ ├── TimesFM.py │ ├── TimesNet.py │ ├── Transformer.py │ ├── WPMixer.py │ ├── __init__.py │ └── iTransformer.py ├── requirements.txt ├── run.py ├── scripts/ │ ├── anomaly_detection/ │ │ ├── MSL/ │ │ │ ├── Autoformer.sh │ │ │ ├── Crossformer.sh │ │ │ ├── DLinear.sh │ │ │ ├── ETSformer.sh │ │ │ ├── FEDformer.sh │ │ │ ├── FiLM.sh │ │ │ ├── Informer.sh │ │ │ ├── KANAD.sh │ │ │ ├── LightTS.sh │ │ │ ├── MICN.sh │ │ │ ├── Pyraformer.sh │ │ │ ├── Reformer.sh │ │ │ ├── TimesNet.sh │ │ │ ├── Transformer.sh │ │ │ └── iTransformer.sh │ │ ├── PSM/ │ │ │ ├── Autoformer.sh │ │ │ ├── DLinear.sh │ │ │ ├── KANAD.sh │ │ │ ├── TimesNet.sh │ │ │ └── Transformer.sh │ │ ├── SMAP/ │ │ │ ├── Autoformer.sh │ │ │ ├── KANAD.sh │ │ │ ├── TimesNet.sh │ │ │ └── Transformer.sh │ │ ├── SMD/ │ │ │ ├── Autoformer.sh │ │ │ ├── KANAD.sh │ │ │ ├── TimesNet.sh │ │ │ └── Transformer.sh │ │ └── SWAT/ │ │ ├── Autoformer.sh │ │ ├── KANAD.sh │ │ ├── TimesNet.sh │ │ └── Transformer.sh │ ├── classification/ │ │ ├── Autoformer.sh │ │ ├── Crossformer.sh │ │ ├── DLinear.sh │ │ ├── ETSformer.sh │ │ ├── FEDformer.sh │ │ ├── FiLM.sh │ │ ├── Informer.sh │ │ ├── LightTS.sh │ │ ├── MICN.sh │ │ ├── MambaSL.out │ │ ├── MambaSL.sh │ │ ├── PatchTST.sh │ │ ├── Pyraformer.sh │ │ ├── Reformer.sh │ │ ├── TimesNet.sh │ │ ├── Transformer.sh │ │ └── iTransformer.sh │ ├── exogenous_forecast/ │ │ ├── ECL/ │ │ │ └── TimeXer.sh │ │ ├── EPF/ │ │ │ └── TimeXer.sh │ │ ├── ETTh1/ │ │ │ └── TimeXer.sh │ │ ├── ETTh2/ │ │ │ └── TimeXer.sh │ │ ├── ETTm1/ │ │ │ └── TimeXer.sh │ │ ├── ETTm2/ │ │ │ └── TimeXer.sh │ │ ├── Traffic/ │ │ │ └── TimeXer.sh │ │ └── Weather/ │ │ └── TimeXer.sh │ ├── imputation/ │ │ ├── ECL_script/ │ │ │ ├── Autoformer.sh │ │ │ ├── DLinear.sh │ │ │ ├── ETSformer.sh │ │ │ ├── FEDformer.sh │ │ │ ├── Informer.sh │ │ │ ├── LightTS.sh │ │ │ ├── Pyraformer.sh │ │ │ ├── Reformer.sh │ │ │ ├── TimesNet.sh │ │ │ ├── Transformer.sh │ │ │ └── iTransformer.sh │ │ ├── ETT_script/ │ │ │ ├── Autoformer_ETTh1.sh │ │ │ ├── Autoformer_ETTh2.sh │ │ │ ├── Autoformer_ETTm1.sh │ │ │ ├── Autoformer_ETTm2.sh │ │ │ ├── Crossformer_ETTh1.sh │ │ │ ├── DLinear_ETTh1.sh │ │ │ ├── FiLM_ETTh1.sh │ │ │ ├── MICN_ETTh1.sh │ │ │ ├── Nonstationary_Transformer_ETTh1.sh │ │ │ ├── TiDE_ETTh1.sh │ │ │ ├── TimesNet_ETTh1.sh │ │ │ ├── TimesNet_ETTh2.sh │ │ │ ├── TimesNet_ETTm1.sh │ │ │ ├── TimesNet_ETTm2.sh │ │ │ ├── Transformer_ETTh1.sh │ │ │ ├── Transformer_ETTh2.sh │ │ │ ├── Transformer_ETTm1.sh │ │ │ ├── Transformer_ETTm2.sh │ │ │ └── iTransformer_ETTh2.sh │ │ └── Weather_script/ │ │ ├── Autoformer.sh │ │ ├── TimesNet.sh │ │ └── Transformer.sh │ ├── long_term_forecast/ │ │ ├── AugmentSample/ │ │ │ ├── Classification/ │ │ │ │ └── PatchTST.sh │ │ │ ├── Forecasting/ │ │ │ │ └── PatchTST.sh │ │ │ └── ReadMe.md │ │ ├── ECL_script/ │ │ │ ├── Autoformer.sh │ │ │ ├── Crossformer.sh │ │ │ ├── DLinear.sh │ │ │ ├── ETSformer.sh │ │ │ ├── FEDformer.sh │ │ │ ├── FiLM.sh │ │ │ ├── Informer.sh │ │ │ ├── Koopa.sh │ │ │ ├── LightTS.sh │ │ │ ├── MICN.sh │ │ │ ├── Mamba.sh │ │ │ ├── MultiPatchFormer.sh │ │ │ ├── Nonstationary_Transformer.sh │ │ │ ├── PatchTST.sh │ │ │ ├── Pyraformer.sh │ │ │ ├── Reformer.sh │ │ │ ├── SegRNN.sh │ │ │ ├── TSMixer.sh │ │ │ ├── TimeMixer.sh │ │ │ ├── TimeXer.sh │ │ │ ├── TimesNet.sh │ │ │ ├── Transformer.sh │ │ │ ├── WPMixer.sh │ │ │ └── iTransformer.sh │ │ ├── ETT_script/ │ │ │ ├── Autoformer_ETTh1.sh │ │ │ ├── Autoformer_ETTh2.sh │ │ │ ├── Autoformer_ETTm1.sh │ │ │ ├── Autoformer_ETTm2.sh │ │ │ ├── Crossformer_ETTh1.sh │ │ │ ├── Crossformer_ETTh2.sh │ │ │ ├── Crossformer_ETTm1.sh │ │ │ ├── Crossformer_ETTm2.sh │ │ │ ├── DLinear_ETTh1.sh │ │ │ ├── ETSformer_ETTh1.sh │ │ │ ├── FEDformer_ETTh1.sh │ │ │ ├── FiLM_ETTh1.sh │ │ │ ├── FiLM_ETTh2.sh │ │ │ ├── FiLM_ETTm1.sh │ │ │ ├── FiLM_ETTm2.sh │ │ │ ├── Informer_ETTh1.sh │ │ │ ├── Koopa_ETTh1.sh │ │ │ ├── Koopa_ETTh2.sh │ │ │ ├── Koopa_ETTm1.sh │ │ │ ├── Koopa_ETTm2.sh │ │ │ ├── LTSM.sh │ │ │ ├── LightTS_ETTh1.sh │ │ │ ├── MICN_ETTh1.sh │ │ │ ├── MICN_ETTh2.sh │ │ │ ├── MICN_ETTm1.sh │ │ │ ├── MICN_ETTm2.sh │ │ │ ├── MambaSimple_ETTh1.sh │ │ │ ├── Mamba_ETT_all.sh │ │ │ ├── Mamba_ETTh1.sh │ │ │ ├── Mamba_ETTh2.sh │ │ │ ├── Mamba_ETTm1.sh │ │ │ ├── Mamba_ETTm2.sh │ │ │ ├── MultiPatchFormer_ETTh1.sh │ │ │ ├── MultiPatchFormer_ETTm1.sh │ │ │ ├── Nonstationary_Transformer_ETTh1.sh │ │ │ ├── Nonstationary_Transformer_ETTh2.sh │ │ │ ├── Nonstationary_Transformer_ETTm1.sh │ │ │ ├── Nonstationary_Transformer_ETTm2.sh │ │ │ ├── PAttn_ETTh1.sh │ │ │ ├── PatchTST_ETTh1.sh │ │ │ ├── PatchTST_ETTh2.sh │ │ │ ├── PatchTST_ETTm1.sh │ │ │ ├── PatchTST_ETTm2.sh │ │ │ ├── Pyraformer_ETTh1.sh │ │ │ ├── Pyraformer_ETTh2.sh │ │ │ ├── Pyraformer_ETTm1.sh │ │ │ ├── Pyraformer_ETTm2.sh │ │ │ ├── Reformer_ETTh1.sh │ │ │ ├── SegRNN_ETTh1.sh │ │ │ ├── SegRNN_ETTh2.sh │ │ │ ├── SegRNN_ETTm1.sh │ │ │ ├── SegRNN_ETTm2.sh │ │ │ ├── TSMixer_ETTh1.sh │ │ │ ├── TSMixer_ETTh2.sh │ │ │ ├── TSMixer_ETTm1.sh │ │ │ ├── TSMixer_ETTm2.sh │ │ │ ├── TiDE_ETTh1.sh │ │ │ ├── TimeMixer_ETTh1.sh │ │ │ ├── TimeMixer_ETTh2.sh │ │ │ ├── TimeMixer_ETTm1.sh │ │ │ ├── TimeMixer_ETTm2.sh │ │ │ ├── TimeXer_ETTh1.sh │ │ │ ├── TimeXer_ETTh2.sh │ │ │ ├── TimeXer_ETTm1.sh │ │ │ ├── TimeXer_ETTm2.sh │ │ │ ├── TimesNet_ETTh1.sh │ │ │ ├── TimesNet_ETTh2.sh │ │ │ ├── TimesNet_ETTm1.sh │ │ │ ├── TimesNet_ETTm2.sh │ │ │ ├── Transformer_ETTh1.sh │ │ │ ├── Transformer_ETTh2.sh │ │ │ ├── Transformer_ETTm1.sh │ │ │ ├── Transformer_ETTm2.sh │ │ │ ├── WPMixer_ETTh1.sh │ │ │ ├── WPMixer_ETTh2.sh │ │ │ ├── WPMixer_ETTm1.sh │ │ │ ├── WPMixer_ETTm2.sh │ │ │ └── iTransformer_ETTh2.sh │ │ ├── Exchange_script/ │ │ │ ├── Autoformer.sh │ │ │ ├── Crossformer.sh │ │ │ ├── FiLM.sh │ │ │ ├── Koopa.sh │ │ │ ├── MICN.sh │ │ │ ├── Mamba.sh │ │ │ ├── Nonstationary_Transformer.sh │ │ │ ├── PatchTST.sh │ │ │ ├── Pyraformer.sh │ │ │ ├── TimesNet.sh │ │ │ └── Transformer.sh │ │ ├── ILI_script/ │ │ │ ├── Autoformer.sh │ │ │ ├── Crossformer.sh │ │ │ ├── FiLM.sh │ │ │ ├── Koopa.sh │ │ │ ├── MICN.sh │ │ │ ├── Nonstationary_Transformer.sh │ │ │ ├── PatchTST.sh │ │ │ ├── TimesNet.sh │ │ │ └── Transformer.sh │ │ ├── Mamba_all.sh │ │ ├── Traffic_script/ │ │ │ ├── Autoformer.sh │ │ │ ├── Crossformer.sh │ │ │ ├── FiLM.sh │ │ │ ├── Koopa.sh │ │ │ ├── MICN.sh │ │ │ ├── Mamba.sh │ │ │ ├── MultiPatchFormer.sh │ │ │ ├── Nonstationary_Transformer.sh │ │ │ ├── PatchTST.sh │ │ │ ├── Pyraformer.sh │ │ │ ├── SegRNN.sh │ │ │ ├── TSMixer.sh │ │ │ ├── TimeMixer.sh │ │ │ ├── TimeXer.sh │ │ │ ├── TimesNet.sh │ │ │ ├── Transformer.sh │ │ │ ├── WPMixer.sh │ │ │ └── iTransformer.sh │ │ └── Weather_script/ │ │ ├── Autoformer.sh │ │ ├── Crossformer.sh │ │ ├── FiLM.sh │ │ ├── MICN.sh │ │ ├── Mamba.sh │ │ ├── MultiPatchFormer.sh │ │ ├── Nonstationary_Transformer.sh │ │ ├── PatchTST.sh │ │ ├── Pyraformer.sh │ │ ├── SegRNN.sh │ │ ├── TSMixer.sh │ │ ├── TimeMixer.sh │ │ ├── TimeXer.sh │ │ ├── TimesNet.sh │ │ ├── Transformer.sh │ │ ├── WPMixer.sh │ │ └── iTransformer.sh │ └── short_term_forecast/ │ ├── Autoformer_M4.sh │ ├── Crossformer_M4.sh │ ├── DLinear_M4.sh │ ├── ETSformer_M4.sh │ ├── FEDformer_M4.sh │ ├── FiLM_M4.sh │ ├── Informer_M4.sh │ ├── LightTS_M4.sh │ ├── MICN_M4.sh │ ├── Mamba_M4.sh │ ├── Nonstationary_Transformer_M4.sh │ ├── Pyraformer_M4.sh │ ├── Reformer_M4.sh │ ├── TSMixer_M4.sh │ ├── TimeMixer_M4.sh │ ├── TimesNet_M4.sh │ ├── Transformer_M4.sh │ └── iTransformer_M4.sh ├── tutorial/ │ └── TimesNet_tutorial.ipynb └── utils/ ├── ADFtest.py ├── __init__.py ├── augmentation.py ├── dtw.py ├── dtw_metric.py ├── losses.py ├── m4_summary.py ├── masking.py ├── metrics.py ├── print_args.py ├── timefeatures.py └── tools.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ /scripts/long_term_forecast/Traffic_script/PatchTST1.sh /backups/ /result.xlsx /~$result.xlsx /Time-Series-Library.zip /temp.sh .idea /tv_result.xlsx /test.py /m4_results/ /test_results/ /PatchTST_results.xlsx /seq_len_long_term_forecast/ /progress.xlsx /scripts/short_term_forecast/PatchTST_M4.sh /run_tv.py /scripts/long_term_forecast/ETT_tv_script/ /dataset/ /data/ data_factory_all.py data_loader_all.py /scripts/short_term_forecast/tv_script/ /exp/exp_short_term_forecasting_tv.py /exp/exp_long_term_forecasting_tv.py /timesnetv2.xlsx /scripts/anomaly_detection/tmp/ /scripts/imputation/tmp/ /utils/self_tools.py /scripts/exp_scripts/ checkpoints/ results/ result_long_term_forecast.txt result_anomaly_detection.txt scripts/augmentation/ run_anylearn.py environment.txt ================================================ FILE: CONTRIBUTING.md ================================================ ## Instructions for Contributing to TSlib Sincerely thanks to all the researchers who want to use or contribute to TSlib. Since our team may not have enough time to fix all the bugs and catch up with the latest model, your contribution is essential to this project. ### (1) Fix Bug You can directly propose a pull request and add detailed descriptions to the comment, such as [this pull request](https://github.com/thuml/Time-Series-Library/pull/498). ### (2) Add a new time series model Thanks to creative researchers, extensive great TS models are presented, which advance this community significantly. If you want to add your model to TSlib, here are some instructions: - Propose an issue to describe your model and give a link to your paper and official code. We will discuss whether your model is suitable for this library, such as [this issue](https://github.com/thuml/Time-Series-Library/issues/346). - Propose a pull request in a similar style as TSlib, which means adding an additional file to ./models and providing corresponding scripts for reproduction, such as [this pull request](https://github.com/thuml/Time-Series-Library/pull/446). Note: Given that there are a lot of TS models that have been proposed, we may not have enough time to judge which model can be a remarkable supplement to the current library. Thus, we decide ONLY to add the officially published paper to our library. Peer review can be a reliable criterion. Thanks again for your valuable contributions. ================================================ FILE: Dockerfile ================================================ # syntax=docker/dockerfile:1.4 FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-devel AS tslib WORKDIR /workspace ARG http_proxy ARG https_proxy ENV http_proxy=${http_proxy} ENV https_proxy=${https_proxy} ENV PYTHONPATH=/workspace/Time-Series-Library:$PYTHONPATH COPY requirements.txt . RUN --mount=type=cache,target=/root/.cache/pip \ pip install -r requirements.txt # mamba-ssm (cxx11abiFALSE) (Time-Series-Library/models/Mamba.py) RUN --mount=type=cache,target=/root/.cache/pip \ pip install https://github.com/state-spaces/mamba/releases/download/v2.2.6.post3/mamba_ssm-2.2.6.post3+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl # uni2ts (--no-deps)(Time-Series-Library/models/Moirai.py) RUN --mount=type=cache,target=/root/.cache/pip \ pip install uni2ts --no-deps COPY . . CMD ["bash"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2021 THUML @ Tsinghua University Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Time Series Library (TSLib) TSLib is an open-source library for deep learning researchers, especially for deep time series analysis. > **中文文档**:[README_zh.md](./README_zh.md) We provide a neat code base to evaluate advanced deep time series models or develop your model, which covers five mainstream tasks: **long- and short-term forecasting, imputation, anomaly detection, and classification.** :triangular_flag_on_post:**News** (2025.12) Many thanks to the great work from [ailuntz](https://github.com/thuml/Time-Series-Library/pull/805), which provides an updated requirements and docker deployment, as well as a well-organized document. This is quite meaningful to this project and beginners. :triangular_flag_on_post:**News** (2025.11) Considering the rapid development of Large Time Series Models (LTSMs), we have newly added a [[zero-shot forecasting]](https://github.com/thuml/Time-Series-Library/blob/main/exp/exp_zero_shot_forecasting.py) feature in TSLib. You can try [this script](https://github.com/thuml/Time-Series-Library/blob/main/scripts/long_term_forecast/ETT_script/LTSM.sh) to evaluate LTSMs. :triangular_flag_on_post:**News** (2025.10) Given the recent confusion among researchers regarding minor improvements on standard benchmarks, we propose the [[Accuracy Law]](https://arxiv.org/abs/2510.02729) to characterize the objectives of deep time series forecasting tasks, which can be used to identify saturated datasets. :triangular_flag_on_post:**News** (2024.10) We have included [[TimeXer]](https://arxiv.org/abs/2402.19072), which defined a practical forecasting paradigm: Forecasting with Exogenous Variables. Considering both practicability and computation efficiency, we believe the new forecasting paradigm defined in TimeXer can be the "right" task for future research. :triangular_flag_on_post:**News** (2024.10) Our lab has open-sourced [[OpenLTM]](https://github.com/thuml/OpenLTM), which provides a distinct pretrain-finetuning paradigm compared to TSLib. If you are interested in Large Time Series Models, you may find this repository helpful. :triangular_flag_on_post:**News** (2024.07) We wrote a comprehensive survey of [[Deep Time Series Models]](https://arxiv.org/abs/2407.13278) with a rigorous benchmark based on TSLib. In this paper, we summarized the design principles of current time series models supported by insightful experiments, hoping to be helpful to future research. :triangular_flag_on_post:**News** (2024.04) Many thanks for the great work from [frecklebars](https://github.com/thuml/Time-Series-Library/pull/378). The famous sequential model [Mamba](https://arxiv.org/abs/2312.00752) has been included in our library. See [this file](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py), where you need to install `mamba_ssm` with pip at first. :triangular_flag_on_post:**News** (2024.03) Given the inconsistent look-back length of various papers, we split the long-term forecasting in the leaderboard into two categories: Look-Back-96 and Look-Back-Searching. We recommend researchers read [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2), which includes both look-back length settings in experiments for scientific rigor. :triangular_flag_on_post:**News** (2023.10) We add an implementation to [iTransformer](https://arxiv.org/abs/2310.06625), which is the state-of-the-art model for long-term forecasting. The official code and complete scripts of iTransformer can be found [here](https://github.com/thuml/iTransformer). :triangular_flag_on_post:**News** (2023.09) We added a detailed [tutorial](https://github.com/thuml/Time-Series-Library/blob/main/tutorial/TimesNet_tutorial.ipynb) for [TimesNet](https://openreview.net/pdf?id=ju_Uqw384Oq) and this library, which is quite friendly to beginners of deep time series analysis. :triangular_flag_on_post:**News** (2023.02) We release the TSlib as a comprehensive benchmark and code base for time series models, which is extended from our previous GitHub repository [Autoformer](https://github.com/thuml/Autoformer). ## Leaderboard for Time Series Analysis Till March 2024, the top three models for five different tasks are: | Model
Ranking | Long-term
Forecasting
Look-Back-96 | Long-term
Forecasting
Look-Back-Searching | Short-term
Forecasting | Imputation | Classification | Anomaly
Detection | | ---------------- | ----------------------------------------------------- | ----------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------------------------------------------------- | | 🥇 1st | [TimeXer](https://arxiv.org/abs/2402.19072) | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | | 🥈 2nd | [iTransformer](https://arxiv.org/abs/2310.06625) | [PatchTST](https://github.com/yuqinie98/PatchTST) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [FEDformer](https://github.com/MAZiqing/FEDformer) | | 🥉 3rd | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [DLinear](https://arxiv.org/pdf/2205.13504.pdf) | [FEDformer](https://github.com/MAZiqing/FEDformer) | [Autoformer](https://github.com/thuml/Autoformer) | [Informer](https://github.com/zhouhaoyi/Informer2020) | [Autoformer](https://github.com/thuml/Autoformer) | **Note: We will keep updating this leaderboard.** If you have proposed advanced and awesome models, you can send us your paper/code link or raise a pull request. We will add them to this repo and update the leaderboard as soon as possible. **Compared models of this leaderboard.** ☑ means that their codes have already been included in this repo. - [x] **TimeXer** - TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables [[NeurIPS 2024]](https://arxiv.org/abs/2402.19072) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeXer.py) - [x] **TimeMixer** - TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting [[ICLR 2024]](https://openreview.net/pdf?id=7oLshfEIC2) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMixer.py). - [x] **TSMixer** - TSMixer: An All-MLP Architecture for Time Series Forecasting [[arXiv 2023]](https://arxiv.org/pdf/2303.06053.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TSMixer.py) - [x] **iTransformer** - iTransformer: Inverted Transformers Are Effective for Time Series Forecasting [[ICLR 2024]](https://arxiv.org/abs/2310.06625) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/iTransformer.py). - [x] **PatchTST** - A Time Series is Worth 64 Words: Long-term Forecasting with Transformers [[ICLR 2023]](https://openreview.net/pdf?id=Jbdc0vTOcol) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/PatchTST.py). - [x] **TimesNet** - TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis [[ICLR 2023]](https://openreview.net/pdf?id=ju_Uqw384Oq) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesNet.py). - [x] **DLinear** - Are Transformers Effective for Time Series Forecasting? [[AAAI 2023]](https://arxiv.org/pdf/2205.13504.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/DLinear.py). - [x] **LightTS** - Less Is More: Fast Multivariate Time Series Forecasting with Light Sampling-oriented MLP Structures [[arXiv 2022]](https://arxiv.org/abs/2207.01186) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/LightTS.py). - [x] **ETSformer** - ETSformer: Exponential Smoothing Transformers for Time-series Forecasting [[arXiv 2022]](https://arxiv.org/abs/2202.01381) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/ETSformer.py). - [x] **Non-stationary Transformer** - Non-stationary Transformers: Exploring the Stationarity in Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/pdf?id=ucNDIDRNjjv) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Nonstationary_Transformer.py). - [x] **FEDformer** - FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting [[ICML 2022]](https://proceedings.mlr.press/v162/zhou22g.html) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FEDformer.py). - [x] **Pyraformer** - Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting [[ICLR 2022]](https://openreview.net/pdf?id=0EXmFzUn5I) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Pyraformer.py). - [x] **Autoformer** - Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting [[NeurIPS 2021]](https://openreview.net/pdf?id=I55UqU-M11y) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Autoformer.py). - [x] **Informer** - Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting [[AAAI 2021]](https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Informer.py). - [x] **Reformer** - Reformer: The Efficient Transformer [[ICLR 2020]](https://openreview.net/forum?id=rkgNKkHtvB) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Reformer.py). - [x] **Transformer** - Attention is All You Need [[NeurIPS 2017]](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Transformer.py). See our latest paper [[TimesNet]](https://arxiv.org/abs/2210.02186) for the comprehensive benchmark. We will release a real-time updated online version soon. **Newly added baselines.** We will add them to the leaderboard after a comprehensive evaluation. - [x] **MambaSL** - MambaSL: Exploring Single-Layer Mamba for Time Series Classification [[ICLR 2026]](https://openreview.net/forum?id=YDl4vqQqGP) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MambaSingleLayer.py) - [x] **TimeFilter** - TimeFilter: Patch-Specific Spatial-Temporal Graph Filtration for Time Series Forecasting [[ICML 2025]](https://arxiv.org/abs/2501.13041) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeFilter.py) - [x] **KAN-AD** - KAN-AD: Time Series Anomaly Detection with Kolmogorov-Arnold Networks [[ICML 2025]](https://arxiv.org/abs/2411.00278) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/KANAD.py) - [x] **MultiPatchFormer** - A multiscale model for multivariate time series forecasting [[Scientific Reports 2025]](https://www.nature.com/articles/s41598-024-82417-4) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MultiPatchFormer.py) - [x] **WPMixer** - WPMixer: Efficient Multi-Resolution Mixing for Long-Term Time Series Forecasting [[AAAI 2025]](https://arxiv.org/abs/2412.17176) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/WPMixer.py) - [x] **MSGNet** - MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting [[AAAI 2024]](https://dl.acm.org/doi/10.1609/aaai.v38i10.28991) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MSGNet.py) - [x] **PAttn** - Are Language Models Actually Useful for Time Series Forecasting? [[NeurIPS 2024]](https://arxiv.org/pdf/2406.16964) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/PAttn.py) - [x] **Mamba** - Mamba: Linear-Time Sequence Modeling with Selective State Spaces [[arXiv 2023]](https://arxiv.org/abs/2312.00752) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py) - [x] **SegRNN** - SegRNN: Segment Recurrent Neural Network for Long-Term Time Series Forecasting [[arXiv 2023]](https://arxiv.org/abs/2308.11200.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/SegRNN.py). - [x] **Koopa** - Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors [[NeurIPS 2023]](https://arxiv.org/pdf/2305.18803.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Koopa.py). - [x] **FreTS** - Frequency-domain MLPs are More Effective Learners in Time Series Forecasting [[NeurIPS 2023]](https://arxiv.org/pdf/2311.06184.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FreTS.py). - [x] **MICN** - MICN: Multi-scale Local and Global Context Modeling for Long-term Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=zt53IDUR1U)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MICN.py). - [x] **Crossformer** - Crossformer: Transformer Utilizing Cross-Dimension Dependency for Multivariate Time Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=vSVLM2j9eie)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Crossformer.py). - [x] **TiDE** - Long-term Forecasting with TiDE: Time-series Dense Encoder [[arXiv 2023]](https://arxiv.org/pdf/2304.08424.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiDE.py). - [x] **SCINet** - SCINet: Time Series Modeling and Forecasting with Sample Convolution and Interaction [[NeurIPS 2022]](https://openreview.net/pdf?id=AyajSjTAzmg)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/SCINet.py). - [x] **FiLM** - FiLM: Frequency improved Legendre Memory Model for Long-term Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/forum?id=zTQdHSQUQWc)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FiLM.py). - [x] **TFT** - Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting [[arXiv 2019]](https://arxiv.org/abs/1912.09363)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TemporalFusionTransformer.py). **Newly added Large Time Series Models.** This library also supports the zero-shot evaluation of the following LTSMs. - [x] **Chronos2** - Chronos-2: From Univariate to Universal Forecasting [[arXiv 2025]](https://arxiv.org/abs/2510.15821) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos2.py) - [x] **TiRex** - TiRex: Zero-Shot Forecasting Across Long and Short Horizons with Enhanced In-Context Learning [[NeurIPS 2025]](https://arxiv.org/pdf/2505.23719) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiRex.py) - [x] **Sundial** - Sundial: A Family of Highly Capable Time Series Foundation Models [[ICML 2025]](https://arxiv.org/pdf/2502.00816) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Sundial.py) - [x] **Time-MoE** - Time-MoE: Billion-Scale Time Series Foundation Models with Mixture of Experts [[ICLR 2025]](https://arxiv.org/pdf/2409.16040) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMoE.py) - [x] **Toto** - Toto: Time Series Optimized Transformer for Observability [arXiv 2024](https://arxiv.org/pdf/2407.07874) - [x] **Chronos** - Chronos: Learning the Language of Time Series [[TMLR 2024]](https://arxiv.org/pdf/2403.07815) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos.py) - [x] **Moirai** - Unified Training of Universal Time Series Forecasting Transformers [[ICML 2024]](https://arxiv.org/pdf/2402.02592) - [x] **TimesFM** - A decoder-only foundation model for time-series forecasting [[ICML 2024]](https://arxiv.org/abs/2310.10688) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesFM.py) ## Getting Started ### Prepare Data You can obtain the well-preprocessed datasets from [[Google Drive]](https://drive.google.com/drive/folders/13Cg1KYOlzM5C7K8gK8NfC-F3EYxkM3D2?usp=sharing), [[Baidu Drive]](https://pan.baidu.com/s/1r3KhGd0Q9PJIUZdfEYoymg?pwd=i9iy) or [[Hugging Face]](https://huggingface.co/datasets/thuml/Time-Series-Library). Then place the downloaded data in the folder `./dataset`. ### Installation 1. Clone this repository. ```bash git clone https://github.com/thuml/Time-Series-Library.git cd Time-Series-Library ``` 2. Create a new Conda environment. ```bash conda create -n tslib python=3.11 conda activate tslib ``` 3. Install Core Dependencies > ⚠️ **CUDA Compatibility Notice** > The torch prebuilt package is **CUDA-version specific**. (See https://pytorch.org/get-started/previous-versions/) > Please make sure to install the package that matches your local CUDA version (e.g., `cu118` or `cu121`). > Recommended: torch==2.5.1 ```bash pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121 pip install -r requirements.txt ``` 4. Install Dependencies for Mamba Model (Required for Time-Series-Library/models/Mamba.py) > ⚠️ **Linux only** > ⚠️ **CUDA Compatibility Notice** > The prebuilt Mamba wheel is **CUDA-version specific**. > Please make sure to install the wheel that matches your local CUDA version > (e.g., `cu11` or `cu12`). Installing a mismatched version may result in > runtime errors or import failures. Example for **CUDA 12**: ```bash pip install https://github.com/state-spaces/mamba/releases/download/v2.2.6.post3/mamba_ssm-2.2.6.post3+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ``` 5. Install Dependencies for Moirai Model (Required for Time-Series-Library/models/Moirai.py) ```bash pip install uni2ts --no-deps ``` ### Docker Deployment ```bash # Build and start the Docker container in detached mode docker compose -f 'Time-Series-Library/docker-compose.yml' up -d --build # Download / place the dataset into a newly created folder ./dataset at the repository root mkdir -p dataset # create the dataset directory # Copy the local dataset into the container at /workspace/dataset docker cp ./dataset tslib:/workspace/dataset # Enter the running container to continue training / evaluation docker exec -it tslib bash # Switch to the workspace directory inside the container cd /workspace # Run zero-shot forecasting with the pre-trained Moirai model python -u run.py \ --task_name zero_shot_forecast \ # task type: zero-shot forecasting --is_training 0 \ # 0 = inference only (no training) --root_path ./dataset/ETT-small/ \ # root directory of the dataset --data_path ETTh1.csv \ # dataset file name --model_id ETTh1_512_96 \ # experiment/model identifier --model Moirai \ # model name (TimesFM / Moirai) --data ETTh1 \ # dataset name --features M \ # multivariate forecasting --seq_len 512 \ # input sequence length --pred_len 96 \ # prediction horizon --enc_in 7 \ # number of input variables --des 'Exp' \ # experiment description --itr 1 # number of runs ``` ### Quick Test Quick test for all 5 tasks (1 epoch each): ```bash # Run quick tests for all 5 tasks export CUDA_VISIBLE_DEVICES=0 # 1. Long-term forecasting python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_long --model DLinear --data ETTh1 --features M --seq_len 96 --pred_len 96 --enc_in 7 --dec_in 7 --c_out 7 --train_epochs 1 --num_workers 2 # 2. Short-term forecasting (using ETT dataset with shorter prediction length) python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_short --model TimesNet --data ETTh1 --features M --seq_len 24 --label_len 12 --pred_len 24 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 5 --train_epochs 1 --num_workers 2 # 3. Imputation python -u run.py --task_name imputation --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_imp --model TimesNet --data ETTh1 --features M --seq_len 96 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 3 --train_epochs 1 --num_workers 2 --label_len 0 --pred_len 0 --mask_rate 0.125 --learning_rate 0.001 # 4. Anomaly detection python -u run.py --task_name anomaly_detection --is_training 1 --root_path ./dataset/PSM --model_id test_ad --model TimesNet --data PSM --features M --seq_len 100 --pred_len 0 --d_model 64 --d_ff 64 --e_layers 2 --enc_in 25 --c_out 25 --anomaly_ratio 1.0 --top_k 3 --train_epochs 1 --batch_size 128 --num_workers 2 # 5. Classification python -u run.py --task_name classification --is_training 1 --root_path ./dataset/Heartbeat/ --model_id Heartbeat --model TimesNet --data UEA --e_layers 2 --d_layers 1 --factor 3 --d_model 64 --d_ff 128 --top_k 3 --train_epochs 1 --batch_size 16 --learning_rate 0.001 --num_workers 0 ``` ### Train and Evaluate We provide the experiment scripts for all benchmarks under the folder `./scripts/`. You can reproduce the experiment results as the following examples: > ⚠️ Some scripts have `CUDA_VISIBLE_DEVICES` set by default. Please modify or remove this setting according to your actual GPU configuration, otherwise it may prevent GPU usage. ```bash # long-term forecast bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh # short-term forecast bash ./scripts/short_term_forecast/TimesNet_M4.sh # imputation bash ./scripts/imputation/ETT_script/TimesNet_ETTh1.sh # anomaly detection bash ./scripts/anomaly_detection/PSM/TimesNet.sh # classification bash ./scripts/classification/TimesNet.sh ``` ### Develop Your Own Model - Add the model file to the folder `./models`. You can follow the `./models/Transformer.py`. - Create the corresponding scripts under the folder `./scripts`. ### Note: (1) About classification: Since we include all five tasks in a unified code base, the accuracy of each subtask may fluctuate but the average performance can be reproduced (even a bit better). We have provided the reproduced checkpoints [here](https://github.com/thuml/Time-Series-Library/issues/494). (2) About anomaly detection: Some discussion about the adjustment strategy in anomaly detection can be found [here](https://github.com/thuml/Anomaly-Transformer/issues/14). The key point is that the adjustment strategy corresponds to an event-level metric. ### Inspect the project structure: ``` Time-Series-Library/ ├── README.md # Official README with tasks, leaderboard, usage ├── requirements.txt # pip dependency list for quick environment setup ├── LICENSE / CONTRIBUTING.md # Upstream license and contribution guide ├── run.py # Unified entry that parses args and dispatches tasks ├── exp/ # Task pipelines wrapping train/val/test │ ├── exp_basic.py # Experiment base class, registers models, builds flows │ ├── exp_long_term_forecasting.py # Long-term forecasting logic │ ├── exp_short_term_forecasting.py # Short-term forecasting logic │ ├── exp_imputation.py # Missing-value imputation │ ├── exp_anomaly_detection.py # Anomaly detection │ ├── exp_classification.py # Classification │ └── exp_zero_shot_forecasting.py # LTSM zero-shot evaluation ├── data_provider/ # Dataset loaders and splits │ ├── data_factory.py # Chooses the proper DataLoader per task │ ├── data_loader.py # Generic TS reader with sliding-window logic │ ├── uea.py / m4.py # Parsers for UEA, M4 and other formats │ └── __init__.py # Exposes factory interfaces upward ├── models/ # All model implementations │ ├── TimesNet.py, TimeMixer.py # Main forecasting models │ ├── Chronos2.py, TiRex.py # LTSM zero-shot models │ └── __init__.py # Enables name-based instantiation inside exp ├── layers/ # Reusable attention / conv / embedding blocks │ ├── Transformer_EncDec.py # Transformer stacks │ ├── AutoCorrelation.py # Auto-correlation operator │ ├── MultiWaveletCorrelation.py# Frequency-domain unit │ └── Embed.py etc. # Shared primitives ├── utils/ # Utility toolbox │ ├── metrics.py # MSE / MAE / DTW and other metrics │ ├── tools.py # General helpers such as EarlyStopping │ ├── augmentation.py # Augmentations for classification / detection │ ├── print_args.py # Unified argument printer │ └── masking.py / losses.py # Task-specific helpers ├── scripts/ # Bash recipes for reproducible experiments │ ├── long_term_forecast/ # Long-term forecasting per dataset/model │ ├── short_term_forecast/ # M4 and other short-term scripts │ ├── imputation/ # Imputation scripts │ ├── anomaly_detection/ # SMD / SMAP / SWAT detection scripts │ ├── classification/ # UEA classification scripts │ └── exogenous_forecast/ # TimeXer exogenous forecasting flow ├── tutorial/ # TimesNet tutorial notebook and figures └── pic/ # README figures (dataset overview, etc.) ``` ### Understand the project architecture: - **E2E flow**: configure experiments via `scripts/*.sh` → run `python run.py ...` → `run.py` parses arguments and selects the proper `Exp_*` via `task_name` → the experiment builds datasets through `data_provider`, instantiates networks from `models`, and drives train/val/test with utilities in `utils` → metrics and checkpoints are written to `./checkpoints`. - **Experiment layer (`exp/`)**: `Exp_Basic` registers models and devices; subclasses implement `_get_data`, `train`, and `test` to encapsulate task-specific differences so the same model can be reused. - **Model & layer layer (`models/` + `layers/`)**: model files define architectures, while reusable attention/conv/frequency components live in `layers/` to minimize duplication. - **Data layer (`data_provider/`)**: `data_factory` returns the correct `Dataset/DataLoader`; `data_loader` handles windowing, masking, and sampling, with arguments controlling window length, missing ratio, anomaly ratio, etc. - **Script layer (`scripts/`)**: bash scripts capture paper configurations (dataset, window, model, GPU) for reproducibility and serve as templates for custom runs. - **Utility layer (`utils/`)**: `metrics` centralizes evaluation, `tools` bundles essentials like `EarlyStopping` and `adjust_learning_rate`, while `augmentation`/`masking` cover task-specific preprocessing. - **Learning path**: recommended reading order is `scripts -> run.py -> exp/exp_basic.py -> corresponding Exp subclass -> data_provider -> models`, using `tutorial/TimesNet_tutorial.ipynb` as a guided walkthrough before diving deeper. ## Citation If you find this repo useful, please cite our paper. ``` @inproceedings{wu2023timesnet, title={TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis}, author={Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long}, booktitle={International Conference on Learning Representations}, year={2023}, } @article{wang2024tssurvey, title={Deep Time Series Models: A Comprehensive Survey and Benchmark}, author={Yuxuan Wang and Haixu Wu and Jiaxiang Dong and Yong Liu and Mingsheng Long and Jianmin Wang}, booktitle={arXiv preprint arXiv:2407.13278}, year={2024}, } ``` ## Contact If you have any questions or suggestions, feel free to contact our maintenance team: Current: - Haixu Wu (Ph.D., wuhaixu98@gmail.com) - Yuxuan Wang (Ph.D. student, wangyuxu22@mails.tsinghua.edu.cn) - Yong Liu (Ph.D. student, liuyong21@mails.tsinghua.edu.cn) - Ailuntz (Student from Open-source Community, ailuntz@icloud.com) Previous: - Huikun Weng (Undergraduate, wenghk22@mails.tsinghua.edu.cn) - Tengge Hu (Master student, htg21@mails.tsinghua.edu.cn) - Haoran Zhang (Master student, z-hr20@mails.tsinghua.edu.cn) - Jiawei Guo (Undergraduate, guo-jw21@mails.tsinghua.edu.cn) Or describe it in Issues. ## Acknowledgement This library is constructed based on the following repos: - Forecasting: https://github.com/thuml/Autoformer. - Anomaly Detection: https://github.com/thuml/Anomaly-Transformer. - Classification: https://github.com/thuml/Flowformer. All the experiment datasets are public, and we obtain them from the following links: - Long-term Forecasting and Imputation: https://github.com/thuml/Autoformer. - Short-term Forecasting: https://github.com/ServiceNow/N-BEATS. - Anomaly Detection: https://github.com/thuml/Anomaly-Transformer. - Classification: https://www.timeseriesclassification.com/. ## All Thanks To Our Contributors ================================================ FILE: README_zh.md ================================================ # 时间序列库(TSLib) TSLib 是一个面向深度学习研究者的开源库,特别适用于深度时间序列分析。 > **English README**:[README.md](./README.md) 我们提供了一个整洁的代码库,用于评测先进的深度时间序列模型或开发自定义模型,覆盖 **长短期预测、插补、异常检测和分类** 等五大主流任务。 :triangular_flag_on_post:**最新动态**(2025.12)非常感谢 [ailuntz](https://github.com/thuml/Time-Series-Library/pull/805) 的杰出贡献,提供了更新的依赖要求和 Docker 部署,以及完善的文档。这对本项目和初学者都很有意义。 :triangular_flag_on_post:**最新动态**(2025.11)鉴于大型时间序列模型(LTSM)的快速发展,我们在 TSLib 中新增了[[零样本预测]](https://github.com/thuml/Time-Series-Library/blob/main/exp/exp_zero_shot_forecasting.py)功能,可参考 [此脚本](https://github.com/thuml/Time-Series-Library/blob/main/scripts/long_term_forecast/ETT_script/LTSM.sh) 评测 LTSM。 :triangular_flag_on_post:**最新动态**(2025.10)针对近期研究者在标准基准上追求微小提升而产生的困惑,我们提出了[[精度定律]](https://arxiv.org/abs/2510.02729),以刻画深度时间序列预测任务的目标,并可据此识别已饱和的数据集。 :triangular_flag_on_post:**最新动态**(2024.10)我们已纳入 [[TimeXer]](https://arxiv.org/abs/2402.19072),其定义了一个实用的预测范式:带外生变量的预测。考虑到实用性与计算效率,我们认为 TimeXer 所定义的新范式将成为未来研究的“正确”任务。 :triangular_flag_on_post:**最新动态**(2024.10)实验室已开源 [[OpenLTM]](https://github.com/thuml/OpenLTM),提供了有别于 TSLib 的预训练 - 微调范式。如果您对大型时间序列模型感兴趣,该仓库值得参考。 :triangular_flag_on_post:**最新动态**(2024.07)我们撰写了关于[[深度时间序列模型]](https://arxiv.org/abs/2407.13278)的综述,并基于 TSLib 构建了严谨的基准。论文总结了当前时间序列模型的设计原则,并通过深入实验验证,期望对未来研究有所帮助。 :triangular_flag_on_post:**最新动态**(2024.04)感谢 [frecklebars](https://github.com/thuml/Time-Series-Library/pull/378) 的贡献,著名的序列模型 [Mamba](https://arxiv.org/abs/2312.00752) 已加入本库。参见[该文件](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py),需要先用 pip 安装 `mamba_ssm`。 :triangular_flag_on_post:**最新动态**(2024.03)鉴于各论文使用的回溯窗口长度不一致,我们将排行榜中的长期预测拆分为 Look-Back-96 与 Look-Back-Searching 两类。建议阅读 [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2),其实验同时包含两种窗口设置,更具科学性。 :triangular_flag_on_post:**最新动态**(2023.10)我们添加了 [iTransformer](https://arxiv.org/abs/2310.06625) 的实现,这是长期预测领域的最新 SOTA。官方代码与完整脚本参见 [此处](https://github.com/thuml/iTransformer)。 :triangular_flag_on_post:**最新动态**(2023.09)我们为 [TimesNet](https://openreview.net/pdf?id=ju_Uqw384Oq) 及本库添加了详细[教程](https://github.com/thuml/Time-Series-Library/blob/main/tutorial/TimesNet_tutorial.ipynb),对时间序列初学者十分友好。 :triangular_flag_on_post:**最新动态**(2023.02)我们发布了 TSlib,作为一个面向时间序列模型的综合基准与代码库,扩展自此前的 [Autoformer](https://github.com/thuml/Autoformer) 仓库。 ## 时间序列分析排行榜 截至 2024 年 3 月,各任务排行榜前三名如下: | 模型
排名 | 长期预测
Look-Back-96 | 长期预测
Look-Back-Searching | 短期预测 | 插补 | 分类 | 异常检测 | | ------------ | ------------------------ | -------------------------------- | -------- | ---- | ---- | -------- | | 🥇 第一名 | [TimeXer](https://arxiv.org/abs/2402.19072) | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | | 🥈 第二名 | [iTransformer](https://arxiv.org/abs/2310.06625) | [PatchTST](https://github.com/yuqinie98/PatchTST) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [FEDformer](https://github.com/MAZiqing/FEDformer) | | 🥉 第三名 | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [DLinear](https://arxiv.org/pdf/2205.13504.pdf) | [FEDformer](https://github.com/MAZiqing/FEDformer) | [Autoformer](https://github.com/thuml/Autoformer) | [Informer](https://github.com/zhouhaoyi/Informer2020) | [Autoformer](https://github.com/thuml/Autoformer) | **说明:排行榜会持续更新。** 如果您提出了先进的模型,可通过发送论文或代码链接、或提交 PR 与我们联系,我们会尽快将其加入仓库并更新排行榜。 **排行榜中的对比模型**(☑ 表示代码已收录)。 - [x] **TimeXer** - TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables [[NeurIPS 2024]](https://arxiv.org/abs/2402.19072) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeXer.py) - [x] **TimeMixer** - TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting [[ICLR 2024]](https://openreview.net/pdf?id=7oLshfEIC2) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMixer.py) - [x] **TSMixer** - TSMixer: An All-MLP Architecture for Time Series Forecasting [[arXiv 2023]](https://arxiv.org/pdf/2303.06053.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TSMixer.py) - [x] **iTransformer** - iTransformer: Inverted Transformers Are Effective for Time Series Forecasting [[ICLR 2024]](https://arxiv.org/abs/2310.06625) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/iTransformer.py) - [x] **PatchTST** - A Time Series is Worth 64 Words: Long-term Forecasting with Transformers [[ICLR 2023]](https://openreview.net/pdf?id=Jbdc0vTOcol) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/PatchTST.py) - [x] **TimesNet** - TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis [[ICLR 2023]](https://openreview.net/pdf?id=ju_Uqw384Oq) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesNet.py) - [x] **DLinear** - Are Transformers Effective for Time Series Forecasting? [[AAAI 2023]](https://arxiv.org/pdf/2205.13504.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/DLinear.py) - [x] **LightTS** - Less Is More: Fast Multivariate Time Series Forecasting with Light Sampling-oriented MLP Structures [[arXiv 2022]](https://arxiv.org/abs/2207.01186) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/LightTS.py) - [x] **ETSformer** - ETSformer: Exponential Smoothing Transformers for Time-series Forecasting [[arXiv 2022]](https://arxiv.org/abs/2202.01381) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/ETSformer.py) - [x] **Non-stationary Transformer** - Non-stationary Transformers: Exploring the Stationarity in Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/pdf?id=ucNDIDRNjjv) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Nonstationary_Transformer.py) - [x] **FEDformer** - FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting [[ICML 2022]](https://proceedings.mlr.press/v162/zhou22g.html) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/FEDformer.py) - [x] **Pyraformer** - Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting [[ICLR 2022]](https://openreview.net/pdf?id=0EXmFzUn5I) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Pyraformer.py) - [x] **Autoformer** - Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting [[NeurIPS 2021]](https://openreview.net/pdf?id=I55UqU-M11y) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Autoformer.py) - [x] **Informer** - Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting [[AAAI 2021]](https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Informer.py) - [x] **Reformer** - Reformer: The Efficient Transformer [[ICLR 2020]](https://openreview.net/forum?id=rkgNKkHtvB) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Reformer.py) - [x] **Transformer** - Attention is All You Need [[NeurIPS 2017]](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Transformer.py) 更多详情可参考我们关于 [[TimesNet]](https://arxiv.org/abs/2210.02186) 的最新论文,实时在线版本即将发布。 **新增基线模型**(综合评测后将加入排行榜)。 - [x] **MambaSL** - MambaSL: Exploring Single-Layer Mamba for Time Series Classification [[ICLR 2026]](https://openreview.net/forum?id=YDl4vqQqGP) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MambaSingleLayer.py) - [x] **TimeFilter** - TimeFilter: Patch-Specific Spatial-Temporal Graph Filtration for Time Series Forecasting [[ICML 2025]](https://arxiv.org/abs/2501.13041) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeFilter.py) - [x] **KAN-AD** - KAN-AD: Time Series Anomaly Detection with Kolmogorov-Arnold Networks [[ICML 2025]](https://arxiv.org/abs/2411.00278) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/KANAD.py) - [x] **MultiPatchFormer** - A multiscale model for multivariate time series forecasting [[Scientific Reports 2025]](https://www.nature.com/articles/s41598-024-82417-4) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/MultiPatchFormer.py) - [x] **WPMixer** - WPMixer: Efficient Multi-Resolution Mixing for Long-Term Time Series Forecasting [[AAAI 2025]](https://arxiv.org/abs/2412.17176) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/WPMixer.py) - [x] **MSGNet** - MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting [[AAAI 2024]](https://dl.acm.org/doi/10.1609/aaai.v38i10.28991) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/MSGNet.py) - [x] **PAttn** - Are Language Models Actually Useful for Time Series Forecasting? [[NeurIPS 2024]](https://arxiv.org/pdf/2406.16964) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/PAttn.py) - [x] **Mamba** - Mamba: Linear-Time Sequence Modeling with Selective State Spaces [[arXiv 2023]](https://arxiv.org/abs/2312.00752) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py) - [x] **SegRNN** - SegRNN: Segment Recurrent Neural Network for Long-Term Time Series Forecasting [[arXiv 2023]](https://arxiv.org/abs/2308.11200.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/SegRNN.py) - [x] **Koopa** - Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors [[NeurIPS 2023]](https://arxiv.org/pdf/2305.18803.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Koopa.py) - [x] **FreTS** - Frequency-domain MLPs are More Effective Learners in Time Series Forecasting [[NeurIPS 2023]](https://arxiv.org/pdf/2311.06184.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/FreTS.py) - [x] **MICN** - MICN: Multi-scale Local and Global Context Modeling for Long-term Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=zt53IDUR1U) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/MICN.py) - [x] **Crossformer** - Crossformer: Transformer Utilizing Cross-Dimension Dependency for Multivariate Time Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=vSVLM2j9eie) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Crossformer.py) - [x] **TiDE** - Long-term Forecasting with TiDE: Time-series Dense Encoder [[arXiv 2023]](https://arxiv.org/pdf/2304.08424.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiDE.py) - [x] **SCINet** - SCINet: Time Series Modeling and Forecasting with Sample Convolution and Interaction [[NeurIPS 2022]](https://openreview.net/pdf?id=AyajSjTAzmg) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/SCINet.py) - [x] **FiLM** - FiLM: Frequency improved Legendre Memory Model for Long-term Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/forum?id=zTQdHSQUQWc) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/FiLM.py) - [x] **TFT** - Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting [[arXiv 2019]](https://arxiv.org/abs/1912.09363) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TemporalFusionTransformer.py) **新增大型时间序列模型**。本库同样支持以下 LTSM 的零样本评测: - [x] **Chronos2** - Chronos-2: From Univariate to Universal Forecasting [[arXiv 2025]](https://arxiv.org/abs/2510.15821) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos2.py) - [x] **TiRex** - TiRex: Zero-Shot Forecasting Across Long and Short Horizons with Enhanced In-Context Learning [[NeurIPS 2025]](https://arxiv.org/pdf/2505.23719) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiRex.py) - [x] **Sundial** - Sundial: A Family of Highly Capable Time Series Foundation Models [[ICML 2025]](https://arxiv.org/pdf/2502.00816) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Sundial.py) - [x] **Time-MoE** - Time-MoE: Billion-Scale Time Series Foundation Models with Mixture of Experts [[ICLR 2025]](https://arxiv.org/pdf/2409.16040) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMoE.py) - [x] **Toto** - Toto: Time Series Optimized Transformer for Observability [[arXiv 2024]](https://arxiv.org/pdf/2407.07874) - [x] **Chronos** - Chronos: Learning the Language of Time Series [[TMLR 2024]](https://arxiv.org/pdf/2403.07815) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos.py) - [x] **Moirai** - Unified Training of Universal Time Series Forecasting Transformers [[ICML 2024]](https://arxiv.org/pdf/2402.02592) - [x] **TimesFM** - TimesFM: A decoder-only foundation model for time-series forecasting [[ICML 2024]](https://arxiv.org/abs/2310.10688) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesFM.py) ## 快速开始 ### 准备数据 可从 [[Google Drive]](https://drive.google.com/drive/folders/13Cg1KYOlzM5C7K8gK8NfC-F3EYxkM3D2?usp=sharing)、[[Baidu Drive]](https://pan.baidu.com/s/1r3KhGd0Q9PJIUZdfEYoymg?pwd=i9iy) 或 [[Hugging Face]](https://huggingface.co/datasets/thuml/Time-Series-Library) 下载预处理数据,并置于 `./dataset` 目录。 ### 安装 1. 克隆本仓库 ```bash git clone https://github.com/thuml/Time-Series-Library.git cd Time-Series-Library ``` 2. 创建新的 Conda 环境 ```bash conda create -n tslib python=3.11 conda activate tslib ``` 3. 安装核心依赖 > ⚠️ **CUDA 兼容性提示** > torch 预编译包与 **CUDA 版本强相关**。(查看 https://pytorch.org/get-started/previous-versions/ ) > 请确保torch安装与本地 CUDA 版本匹配的包(如 `cu118` 或 `cu121`)。 > 推荐torch==2.5.1 ```bash pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121 pip install -r requirements.txt ``` 4. 安装 Mamba 模型依赖(models/Mamba.py 需要) > ⚠️ **只有linux版本** > ⚠️ **CUDA 兼容性提示** > Mamba 预编译包与 **CUDA 版本强相关**。 > 请确保安装与本地 CUDA 版本匹配的包(如 `cu11` 或 `cu12`)。 > 版本不匹配可能导致运行时错误或导入失败。 **CUDA 12** 示例: ```bash pip install https://github.com/state-spaces/mamba/releases/download/v2.2.6.post3/mamba_ssm-2.2.6.post3+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ``` 5. 安装 Moirai 模型依赖(models/Moirai.py 需要) ```bash pip install uni2ts --no-deps ``` ### Docker 部署 ```bash # 构建并以后台模式启动容器 docker compose -f 'Time-Series-Library/docker-compose.yml' up -d --build # 在仓库根目录创建 ./dataset 并下载/放置数据集 mkdir -p dataset # 将本地数据集复制到容器内 /workspace/dataset docker cp ./dataset tslib:/workspace/dataset # 进入运行中的容器 docker exec -it tslib bash # 切换到容器内的工作目录 cd /workspace # 使用预训练 Moirai 模型进行零样本预测 python -u run.py \ --task_name zero_shot_forecast \ # 任务类型:零样本预测 --is_training 0 \ # 0 = 仅推理 --root_path ./dataset/ETT-small/ \ # 数据集根路径 --data_path ETTh1.csv \ # 数据文件名 --model_id ETTh1_512_96 \ # 实验/模型标识 --model Moirai \ # 模型名称(TimesFM / Moirai) --data ETTh1 \ # 数据集名称 --features M \ # 多变量预测 --seq_len 512 \ # 输入序列长度 --pred_len 96 \ # 预测步长 --enc_in 7 \ # 输入变量数 --des 'Exp' \ # 实验描述 --itr 1 # 运行次数 ``` ### 快速测试 5个任务快速测试(每个任务1个epoch): ```bash # 执行所有5个任务的快速测试 export CUDA_VISIBLE_DEVICES=0 # 1. 长期预测 python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_long --model DLinear --data ETTh1 --features M --seq_len 96 --pred_len 96 --enc_in 7 --dec_in 7 --c_out 7 --train_epochs 1 --num_workers 2 # 2. 短期预测(使用ETT数据集,较短预测长度) python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_short --model TimesNet --data ETTh1 --features M --seq_len 24 --label_len 12 --pred_len 24 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 5 --train_epochs 1 --num_workers 2 # 3. 插补 python -u run.py --task_name imputation --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_imp --model TimesNet --data ETTh1 --features M --seq_len 96 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 3 --train_epochs 1 --num_workers 2 --label_len 0 --pred_len 0 --mask_rate 0.125 --learning_rate 0.001 # 4. 异常检测 python -u run.py --task_name anomaly_detection --is_training 1 --root_path ./dataset/PSM --model_id test_ad --model TimesNet --data PSM --features M --seq_len 100 --pred_len 0 --d_model 64 --d_ff 64 --e_layers 2 --enc_in 25 --c_out 25 --anomaly_ratio 1.0 --top_k 3 --train_epochs 1 --batch_size 128 --num_workers 2 # 5. 分类 python -u run.py --task_name classification --is_training 1 --root_path ./dataset/Heartbeat/ --model_id Heartbeat --model TimesNet --data UEA --e_layers 2 --d_layers 1 --factor 3 --d_model 64 --d_ff 128 --top_k 3 --train_epochs 1 --batch_size 16 --learning_rate 0.001 --num_workers 0 ``` ### 训练与评测 `./scripts/` 目录下提供了全部基准的实验脚本,可参考下列示例复现实验: > ⚠️ 部分脚本中默认设置了 `CUDA_VISIBLE_DEVICES`,请根据实际 GPU 配置修改或删除该设置,否则可能导致无法使用 GPU。 ```bash # 长期预测 bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh # 短期预测 bash ./scripts/short_term_forecast/TimesNet_M4.sh # 插补 bash ./scripts/imputation/ETT_script/TimesNet_ETTh1.sh # 异常检测 bash ./scripts/anomaly_detection/PSM/TimesNet.sh # 分类 bash ./scripts/classification/TimesNet.sh ``` ### 开发自定义模型 - 将模型文件放入 `./models`,可参考 `./models/Transformer.py`。 - 在 `./scripts` 下创建对应的运行脚本。 ### 注意事项: (1) 关于分类:由于我们在统一代码库中涵盖五大任务,各子任务的精度可能略有波动,但平均性能可复现(甚至略高)。复现用 checkpoint 可在 [此处](https://github.com/thuml/Time-Series-Library/issues/494) 下载。 (2) 关于异常检测:有关异常检测调整策略的讨论见[这里](https://github.com/thuml/Anomaly-Transformer/issues/14),核心是该调整策略对应事件级指标。 ### 查看项目文件结构: ``` Time-Series-Library/ ├── README.md # 官方README,包含任务、榜单、使用方法 ├── requirements.txt # pip依赖列表,直接pip install复现环境 ├── LICENSE / CONTRIBUTING.md # 原项目许可与贡献指南 ├── run.py # 单入口脚本,解析参数并调度各任务 ├── exp/ # 各任务实验管线,封装训练/验证/测试 │ ├── exp_basic.py # 实验基类,注册所有模型,统一构建流程 │ ├── exp_long_term_forecasting.py # 长期预测实验逻辑 │ ├── exp_short_term_forecasting.py # 短期预测实验逻辑 │ ├── exp_imputation.py # 缺失值填充实验 │ ├── exp_anomaly_detection.py # 异常检测实验 │ ├── exp_classification.py # 分类实验 │ └── exp_zero_shot_forecasting.py # LTSM零样本预测评估 ├── data_provider/ # 数据入口,负责数据集载入与切分 │ ├── data_factory.py # 根据任务选择对应DataLoader │ ├── data_loader.py # 通用时序数据读取与滑窗逻辑 │ ├── uea.py / m4.py # UEA、M4等特定数据格式处理 │ └── __init__.py # 暴露上层可用的数据工厂接口 ├── models/ # 所有模型实现,文件名即模型名 │ ├── TimesNet.py、TimeMixer.py 等 # 主流预测模型 │ ├── Chronos2.py、TiRex.py # LTSM零样本模型 │ └── __init__.py # 统一导出供实验模块按名称实例化 ├── layers/ # 复用层/块,如注意力、卷积、嵌入 │ ├── Transformer_EncDec.py # Transformer编解码堆栈 │ ├── AutoCorrelation.py # 自相关算子 │ ├── MultiWaveletCorrelation.py# 频域单元 │ └── Embed.py 等 # 各模型共享基元 ├── utils/ # 工具集合 │ ├── metrics.py # MSE/MAE/DTW等评估指标 │ ├── tools.py # 训练通用工具,比如EarlyStopping │ ├── augmentation.py # 分类/检测任务增强策略 │ ├── print_args.py # 统一打印参数 │ └── masking.py / losses.py # 任务相关辅助函数 ├── scripts/ # 复现实验的bash脚本 │ ├── long_term_forecast/ # 按数据集/模型划分的长期预测脚本 │ ├── short_term_forecast/ # M4等短期预测脚本 │ ├── imputation/ # 多数据集缺失填充脚本 │ ├── anomaly_detection/ # SMD/SMAP/SWAT等检测脚本 │ ├── classification/ # UEA分类脚本 │ └── exogenous_forecast/ # TimeXer外生变量预测流程 ├── tutorial/ # 官方TimesNet教学notebook与插图 └── pic/ # README插图(数据集分布等) ``` ### 理解项目架构: - **整体流程**:通过 `scripts/*.sh` 设定实验参数 → 调用 `python run.py ...` → `run.py` 解析参数并根据 `task_name` 选择对应 `Exp_*` 类 → `Exp_*` 内部利用 `data_provider` 构造数据加载器、`models` 实例化网络、`utils` 中的工具完成训练/验证/测试 → 结果与模型参数写入 `./checkpoints`。 - **实验层(exp/)**:`Exp_Basic` 负责注册模型与设备,子类实现 `_get_data/train/test`,将不同任务的差异隔离,方便模型在多任务间复用。 - **模型与层(models/ + layers/)**:模型文件集中定义各网络结构,公用的注意力、卷积、频域块等沉淀在 `layers/`,减少重复实现。 - **数据层(data_provider/)**:`data_factory` 按任务返回 Dataset/DataLoader,`data_loader` 封装序列裁剪、滑动窗口、掩码策略,不同任务通过参数控制窗口长度、缺失率、异常比例。 - **脚本层(scripts/)**:提供与论文一致的复现实验脚本,涵盖各种数据集/模型/GPU 配置,便于批量跑榜,也可作为自定义实验的起点。 - **辅助层(utils/)**:`metrics` 统一评估指标,`tools` 中的 `EarlyStopping`、`adjust_learning_rate` 等负责训练调度;`augmentation`/`masking` 等用于任务特定的数据增强或预处理。 - **学习建议**:阅读顺序推荐 `scripts -> run.py -> exp/exp_basic.py -> 对应 Exp 子类 -> data_provider -> models`,并结合 `tutorial/TimesNet_tutorial.ipynb` 快速熟悉整体调用链,再按需深入模型或层级实现。 ## 引用 如果本仓库对您有帮助,请引用以下论文: ``` @inproceedings{wu2023timesnet, title={TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis}, author={Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long}, booktitle={International Conference on Learning Representations}, year={2023}, } @article{wang2024tssurvey, title={Deep Time Series Models: A Comprehensive Survey and Benchmark}, author={Yuxuan Wang and Haixu Wu and Jiaxiang Dong and Yong Liu and Mingsheng Long and Jianmin Wang}, booktitle={arXiv preprint arXiv:2407.13278}, year={2024}, } ``` ## 联系方式 如有问题或建议,欢迎联系维护团队: 现任: - Haixu Wu(博士,wuhaixu98@gmail.com) - Yuxuan Wang(博士生,wangyuxu22@mails.tsinghua.edu.cn) - Yong Liu(博士生,liuyong21@mails.tsinghua.edu.cn) - Ailuntz(开源社区学生,ailuntz@icloud.com) 往届: - Huikun Weng(本科生,wenghk22@mails.tsinghua.edu.cn) - Tengge Hu(硕士,htg21@mails.tsinghua.edu.cn) - Haoran Zhang(硕士,z-hr20@mails.tsinghua.edu.cn) - Jiawei Guo(本科生,guo-jw21@mails.tsinghua.edu.cn) 也欢迎在 Issues 中反馈。 ## 致谢 本库参考了以下仓库: - 预测:https://github.com/thuml/Autoformer - 异常检测:https://github.com/thuml/Anomaly-Transformer - 分类:https://github.com/thuml/Flowformer 实验所用数据集均为公开数据,来源如下: - 长期预测与插补:https://github.com/thuml/Autoformer - 短期预测:https://github.com/ServiceNow/N-BEATS - 异常检测:https://github.com/thuml/Anomaly-Transformer - 分类:https://www.timeseriesclassification.com/ ## 感谢所有贡献者 ================================================ FILE: data_provider/__init__.py ================================================ ================================================ FILE: data_provider/data_factory.py ================================================ from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \ MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader from data_provider.uea import collate_fn from torch.utils.data import DataLoader data_dict = { 'ETTh1': Dataset_ETT_hour, 'ETTh2': Dataset_ETT_hour, 'ETTm1': Dataset_ETT_minute, 'ETTm2': Dataset_ETT_minute, 'custom': Dataset_Custom, 'm4': Dataset_M4, 'PSM': PSMSegLoader, 'MSL': MSLSegLoader, 'SMAP': SMAPSegLoader, 'SMD': SMDSegLoader, 'SWAT': SWATSegLoader, 'UEA': UEAloader } def data_provider(args, flag): Data = data_dict[args.data] timeenc = 0 if args.embed != 'timeF' else 1 shuffle_flag = False if (flag == 'test' or flag == 'TEST') else True drop_last = False batch_size = args.batch_size freq = args.freq if args.task_name == 'anomaly_detection': drop_last = False data_set = Data( args = args, root_path=args.root_path, win_size=args.seq_len, flag=flag, ) print(flag, len(data_set)) data_loader = DataLoader( data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last) return data_set, data_loader elif args.task_name == 'classification': drop_last = False data_set = Data( args = args, root_path=args.root_path, flag=flag, ) data_loader = DataLoader( data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last, collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) ) return data_set, data_loader else: if args.data == 'm4': drop_last = False data_set = Data( args = args, root_path=args.root_path, data_path=args.data_path, flag=flag, size=[args.seq_len, args.label_len, args.pred_len], features=args.features, target=args.target, timeenc=timeenc, freq=freq, seasonal_patterns=args.seasonal_patterns ) print(flag, len(data_set)) data_loader = DataLoader( data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last) return data_set, data_loader ================================================ FILE: data_provider/data_loader.py ================================================ import os import numpy as np import pandas as pd import glob import re import torch from torch.utils.data import Dataset, DataLoader from sklearn.preprocessing import StandardScaler from utils.timefeatures import time_features from data_provider.m4 import M4Dataset, M4Meta from data_provider.uea import subsample, interpolate_missing, Normalizer from sktime.datasets import load_from_tsfile_to_dataframe import warnings from utils.augmentation import run_augmentation_single from datasets import load_dataset from huggingface_hub import hf_hub_download warnings.filterwarnings('ignore') HUGGINGFACE_REPO = "thuml/Time-Series-Library" class Dataset_ETT_hour(Dataset): def __init__(self, args, root_path, flag='train', size=None, features='S', data_path='ETTh1.csv', target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): # size [seq_len, label_len, pred_len] self.args = args # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() local_fp = os.path.join(self.root_path, self.data_path) cfg_name = os.path.splitext(os.path.basename(self.data_path))[0] if os.path.exists(local_fp): df_raw = pd.read_csv(local_fp) else: ds = load_dataset(HUGGINGFACE_REPO, name=cfg_name) df_raw = ds["train"].to_pandas() border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] if self.set_type == 0 and self.args.augmentation_ratio > 0: self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args) self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_ETT_minute(Dataset): def __init__(self, args, root_path, flag='train', size=None, features='S', data_path='ETTm1.csv', target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None): # size [seq_len, label_len, pred_len] self.args = args # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() local_fp = os.path.join(self.root_path, self.data_path) cfg_name = os.path.splitext(os.path.basename(self.data_path))[0] if os.path.exists(local_fp): df_raw = pd.read_csv(local_fp) else: ds = load_dataset(HUGGINGFACE_REPO, name=cfg_name) df_raw = ds["train"].to_pandas() border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] if self.set_type == 0 and self.args.augmentation_ratio > 0: self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args) self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_Custom(Dataset): def __init__(self, args, root_path, flag='train', size=None, features='S', data_path='ETTh1.csv', target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): # size [seq_len, label_len, pred_len] self.args = args # info if size == None: self.seq_len = 24 * 4 * 4 self.label_len = 24 * 4 self.pred_len = 24 * 4 else: self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] # init assert flag in ['train', 'test', 'val'] type_map = {'train': 0, 'val': 1, 'test': 2} self.set_type = type_map[flag] self.features = features self.target = target self.scale = scale self.timeenc = timeenc self.freq = freq self.root_path = root_path self.data_path = data_path self.__read_data__() def __read_data__(self): self.scaler = StandardScaler() local_fp = os.path.join(self.root_path, self.data_path) cfg_name = os.path.splitext(os.path.basename(self.data_path))[0] if os.path.exists(local_fp): df_raw = pd.read_csv(local_fp) else: ds = load_dataset(HUGGINGFACE_REPO, name=cfg_name) split_name = "train" if "train" in ds else list(ds.keys())[0] df_raw = ds[split_name].to_pandas() ''' df_raw.columns: ['date', ...(other features), target feature] ''' cols = list(df_raw.columns) cols.remove(self.target) cols.remove('date') df_raw = df_raw[['date'] + cols + [self.target]] num_train = int(len(df_raw) * 0.7) num_test = int(len(df_raw) * 0.2) num_vali = len(df_raw) - num_train - num_test border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] border2s = [num_train, num_train + num_vali, len(df_raw)] border1 = border1s[self.set_type] border2 = border2s[self.set_type] if self.features == 'M' or self.features == 'MS': cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] elif self.features == 'S': df_data = df_raw[[self.target]] if self.scale: train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) data = self.scaler.transform(df_data.values) else: data = df_data.values df_stamp = df_raw[['date']][border1:border2] df_stamp['date'] = pd.to_datetime(df_stamp.date) if self.timeenc == 0: df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) data_stamp = df_stamp.drop(['date'], 1).values elif self.timeenc == 1: data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) data_stamp = data_stamp.transpose(1, 0) self.data_x = data[border1:border2] self.data_y = data[border1:border2] if self.set_type == 0 and self.args.augmentation_ratio > 0: self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args) self.data_stamp = data_stamp def __getitem__(self, index): s_begin = index s_end = s_begin + self.seq_len r_begin = s_end - self.label_len r_end = r_begin + self.label_len + self.pred_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] return seq_x, seq_y, seq_x_mark, seq_y_mark def __len__(self): return len(self.data_x) - self.seq_len - self.pred_len + 1 def inverse_transform(self, data): return self.scaler.inverse_transform(data) class Dataset_M4(Dataset): def __init__(self, args, root_path, flag='pred', size=None, features='S', data_path='ETTh1.csv', target='OT', scale=False, inverse=False, timeenc=0, freq='15min', seasonal_patterns='Yearly'): # size [seq_len, label_len, pred_len] # init self.features = features self.target = target self.scale = scale self.inverse = inverse self.timeenc = timeenc self.root_path = root_path self.seq_len = size[0] self.label_len = size[1] self.pred_len = size[2] self.seasonal_patterns = seasonal_patterns self.history_size = M4Meta.history_size[seasonal_patterns] self.window_sampling_limit = int(self.history_size * self.pred_len) self.flag = flag self.__read_data__() def __read_data__(self): # M4Dataset.initialize() if self.flag == 'train': dataset = M4Dataset.load(training=True, dataset_file=self.root_path) else: dataset = M4Dataset.load(training=False, dataset_file=self.root_path) training_values = np.array( [v[~np.isnan(v)] for v in dataset.values[dataset.groups == self.seasonal_patterns]]) # split different frequencies self.ids = np.array([i for i in dataset.ids[dataset.groups == self.seasonal_patterns]]) self.timeseries = [ts for ts in training_values] def __getitem__(self, index): insample = np.zeros((self.seq_len, 1)) insample_mask = np.zeros((self.seq_len, 1)) outsample = np.zeros((self.pred_len + self.label_len, 1)) outsample_mask = np.zeros((self.pred_len + self.label_len, 1)) # m4 dataset sampled_timeseries = self.timeseries[index] cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit), high=len(sampled_timeseries), size=1)[0] insample_window = sampled_timeseries[max(0, cut_point - self.seq_len):cut_point] insample[-len(insample_window):, 0] = insample_window insample_mask[-len(insample_window):, 0] = 1.0 outsample_window = sampled_timeseries[ max(0, cut_point - self.label_len):min(len(sampled_timeseries), cut_point + self.pred_len)] outsample[:len(outsample_window), 0] = outsample_window outsample_mask[:len(outsample_window), 0] = 1.0 return insample, outsample, insample_mask, outsample_mask def __len__(self): return len(self.timeseries) def inverse_transform(self, data): return self.scaler.inverse_transform(data) def last_insample_window(self): """ The last window of insample size of all timeseries. This function does not support batching and does not reshuffle timeseries. :return: Last insample window of all timeseries. Shape "timeseries, insample size" """ insample = np.zeros((len(self.timeseries), self.seq_len)) insample_mask = np.zeros((len(self.timeseries), self.seq_len)) for i, ts in enumerate(self.timeseries): ts_last_window = ts[-self.seq_len:] insample[i, -len(ts):] = ts_last_window insample_mask[i, -len(ts):] = 1.0 return insample, insample_mask class PSMSegLoader(Dataset): def __init__(self, args, root_path, win_size, step=1, flag="train"): self.flag = flag self.step = step self.win_size = win_size self.scaler = StandardScaler() train_path = os.path.join(root_path, "train.csv") test_path = os.path.join(root_path, "test.csv") label_path = os.path.join(root_path, "test_label.csv") if all(os.path.exists(p) for p in [train_path, test_path, label_path]): train_df = pd.read_csv(train_path) test_df = pd.read_csv(test_path) test_label_df = pd.read_csv(label_path) else: ds_data = load_dataset(HUGGINGFACE_REPO, name="PSM-data") ds_label = load_dataset(HUGGINGFACE_REPO, name="PSM-label") train_df = ds_data["train"].to_pandas() test_df = ds_data["test"].to_pandas() test_label_df = ds_label[next(iter(ds_label))].to_pandas() data = train_df.values[:, 1:] data = np.nan_to_num(data) self.scaler.fit(data) data = self.scaler.transform(data) test_data = test_df.values[:, 1:] test_data = np.nan_to_num(test_data) self.test = self.scaler.transform(test_data) self.train = data data_len = len(self.train) self.val = self.train[(int)(data_len * 0.8):] self.test_labels = test_label_df.values[:, 1:] print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): if self.flag == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.flag == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class MSLSegLoader(Dataset): def __init__(self, args, root_path, win_size, step=1, flag="train"): self.flag = flag self.step = step self.win_size = win_size self.scaler = StandardScaler() train_path = os.path.join(root_path, "MSL_train.npy") test_path = os.path.join(root_path, "MSL_test.npy") label_path = os.path.join(root_path, "MSL_test_label.npy") if all(os.path.exists(p) for p in [train_path, test_path, label_path]): train_data = np.load(train_path) test_data = np.load(test_path) test_label = np.load(label_path) else: train_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="MSL/MSL_train.npy",repo_type="dataset") test_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="MSL/MSL_test.npy",repo_type="dataset") label_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="MSL/MSL_test_label.npy",repo_type="dataset") train_data = np.load(train_path) test_data = np.load(test_path) test_label = np.load(label_path) self.scaler.fit(train_data) train_data = self.scaler.transform(train_data) test_data = self.scaler.transform(test_data) self.train = train_data self.test = test_data self.test_labels = test_label data_len = len(self.train) self.val = self.train[int(data_len * 0.8):] print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): if self.flag == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.flag == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SMAPSegLoader(Dataset): def __init__(self, args, root_path, win_size, step=1, flag="train"): self.flag = flag self.step = step self.win_size = win_size self.scaler = StandardScaler() train_path = os.path.join(root_path, "SMAP_train.npy") test_path = os.path.join(root_path, "SMAP_test.npy") label_path = os.path.join(root_path, "SMAP_test_label.npy") if all(os.path.exists(p) for p in [train_path, test_path, label_path]): train_data = np.load(train_path) test_data = np.load(test_path) test_label = np.load(label_path) else: train_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMAP/SMAP_train.npy",repo_type="dataset") test_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMAP/SMAP_test.npy",repo_type="dataset") label_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMAP/SMAP_test_label.npy",repo_type="dataset") train_data = np.load(train_path) test_data = np.load(test_path) test_label = np.load(label_path) # 标准化 self.scaler.fit(train_data) train_data = self.scaler.transform(train_data) test_data = self.scaler.transform(test_data) self.train = train_data self.test = test_data self.test_labels = test_label data_len = len(self.train) self.val = self.train[int(data_len * 0.8):] print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): if self.flag == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.flag == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SMDSegLoader(Dataset): def __init__(self, args, root_path, win_size, step=100, flag="train"): self.flag = flag self.step = step self.win_size = win_size self.scaler = StandardScaler() train_path = os.path.join(root_path, "SMD_train.npy") test_path = os.path.join(root_path, "SMD_test.npy") label_path = os.path.join(root_path, "SMD_test_label.npy") if all(os.path.exists(p) for p in [train_path, test_path, label_path]): train_data = np.load(train_path) test_data = np.load(test_path) test_label = np.load(label_path) else: train_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMD/SMD_train.npy",repo_type="dataset") test_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMD/SMD_test.npy",repo_type="dataset") label_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMD/SMD_test_label.npy",repo_type="dataset") train_data = np.load(train_path) test_data = np.load(test_path) test_label = np.load(label_path) self.scaler.fit(train_data) train_data = self.scaler.transform(train_data) test_data = self.scaler.transform(test_data) self.train = train_data self.test = test_data data_len = len(self.train) self.val = self.train[(int)(data_len * 0.8):] self.test_labels = test_label print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): if self.flag == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.flag == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class SWATSegLoader(Dataset): def __init__(self, args, root_path, win_size, step=1, flag="train"): self.flag = flag self.step = step self.win_size = win_size self.scaler = StandardScaler() train2_path = os.path.join(root_path, "swat_train2.csv") test_path = os.path.join(root_path, "swat2.csv") if all(os.path.exists(p) for p in [train2_path, test_path]): train_data = pd.read_csv(train2_path) test_data = pd.read_csv(test_path) else: ds = load_dataset(HUGGINGFACE_REPO, name="SWaT") train_data = ds["train"].to_pandas() test_data = ds["test"].to_pandas() labels = test_data.values[:, -1:] train_data = train_data.values[:, :-1] test_data = test_data.values[:, :-1] self.scaler.fit(train_data) train_data = self.scaler.transform(train_data) test_data = self.scaler.transform(test_data) self.train = train_data self.test = test_data data_len = len(self.train) self.val = self.train[(int)(data_len * 0.8):] self.test_labels = labels print("test:", self.test.shape) print("train:", self.train.shape) def __len__(self): """ Number of images in the object dataset. """ if self.flag == "train": return (self.train.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'val'): return (self.val.shape[0] - self.win_size) // self.step + 1 elif (self.flag == 'test'): return (self.test.shape[0] - self.win_size) // self.step + 1 else: return (self.test.shape[0] - self.win_size) // self.win_size + 1 def __getitem__(self, index): index = index * self.step if self.flag == "train": return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'val'): return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) elif (self.flag == 'test'): return np.float32(self.test[index:index + self.win_size]), np.float32( self.test_labels[index:index + self.win_size]) else: return np.float32(self.test[ index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) class UEAloader(Dataset): """ Dataset class for datasets included in: Time Series Classification Archive (www.timeseriesclassification.com) Argument: limit_size: float in (0, 1) for debug Attributes: all_df: (num_samples * seq_len, num_columns) dataframe indexed by integer indices, with multiple rows corresponding to the same index (sample). Each row is a time step; Each column contains either metadata (e.g. timestamp) or a feature. feature_df: (num_samples * seq_len, feat_dim) dataframe; contains the subset of columns of `all_df` which correspond to selected features feature_names: names of columns contained in `feature_df` (same as feature_df.columns) all_IDs: (num_samples,) series of IDs contained in `all_df`/`feature_df` (same as all_df.index.unique() ) labels_df: (num_samples, num_labels) pd.DataFrame of label(s) for each sample max_seq_len: maximum sequence (time series) length. If None, script argument `max_seq_len` will be used. (Moreover, script argument overrides this attribute) """ def __init__(self, args, root_path, file_list=None, limit_size=None, flag=None): self.args = args self.root_path = root_path self.flag = flag self.all_df, self.labels_df = self.load_all(root_path, file_list=file_list, flag=flag) self.all_IDs = self.all_df.index.unique() # all sample IDs (integer indices 0 ... num_samples-1) if limit_size is not None: if limit_size > 1: limit_size = int(limit_size) else: # interpret as proportion if in (0, 1] limit_size = int(limit_size * len(self.all_IDs)) self.all_IDs = self.all_IDs[:limit_size] self.all_df = self.all_df.loc[self.all_IDs] # use all features self.feature_names = self.all_df.columns self.feature_df = self.all_df # pre_process normalizer = Normalizer() self.feature_df = normalizer.normalize(self.feature_df) print(len(self.all_IDs)) def _resolve_ts_path(self, root_path, dataset_name, flag): split = "TRAIN" if "train" in str(flag).lower() else "TEST" fname = f"{dataset_name}_{split}.ts" local = os.path.join(root_path, fname) if os.path.exists(local): return local return hf_hub_download(HUGGINGFACE_REPO, filename=f"{dataset_name}/{fname}", repo_type="dataset") def load_all(self, root_path, file_list=None, flag=None): """ Loads datasets from ts files contained in `root_path` into a dataframe, optionally choosing from `pattern` Args: root_path: directory containing all individual .ts files file_list: optionally, provide a list of file paths within `root_path` to consider. Otherwise, entire `root_path` contents will be used. Returns: all_df: a single (possibly concatenated) dataframe with all data corresponding to specified files labels_df: dataframe containing label(s) for each sample """ # Select paths for training and evaluation dataset_name = self.args.model_id ts_path = self._resolve_ts_path(root_path, dataset_name, flag or "train") all_df, labels_df = self.load_single(ts_path) return all_df, labels_df def load_single(self, filepath): df, labels = load_from_tsfile_to_dataframe(filepath, return_separate_X_and_y=True, replace_missing_vals_with='NaN') labels = pd.Series(labels, dtype="category") self.class_names = labels.cat.categories labels_df = pd.DataFrame(labels.cat.codes, dtype=np.int8) # int8-32 gives an error when using nn.CrossEntropyLoss lengths = df.applymap( lambda x: len(x)).values # (num_samples, num_dimensions) array containing the length of each series horiz_diffs = np.abs(lengths - np.expand_dims(lengths[:, 0], -1)) if np.sum(horiz_diffs) > 0: # if any row (sample) has varying length across dimensions df = df.applymap(subsample) lengths = df.applymap(lambda x: len(x)).values vert_diffs = np.abs(lengths - np.expand_dims(lengths[0, :], 0)) if np.sum(vert_diffs) > 0: # if any column (dimension) has varying length across samples self.max_seq_len = int(np.max(lengths[:, 0])) else: self.max_seq_len = lengths[0, 0] # First create a (seq_len, feat_dim) dataframe for each sample, indexed by a single integer ("ID" of the sample) # Then concatenate into a (num_samples * seq_len, feat_dim) dataframe, with multiple rows corresponding to the # sample index (i.e. the same scheme as all datasets in this project) df = pd.concat((pd.DataFrame({col: df.loc[row, col] for col in df.columns}).reset_index(drop=True).set_index( pd.Series(lengths[row, 0] * [row])) for row in range(df.shape[0])), axis=0) # Replace NaN values grp = df.groupby(by=df.index) df = grp.transform(interpolate_missing) return df, labels_df def instance_norm(self, case): if self.root_path.count('EthanolConcentration') > 0: # special process for numerical stability mean = case.mean(0, keepdim=True) case = case - mean stdev = torch.sqrt(torch.var(case, dim=1, keepdim=True, unbiased=False) + 1e-5) case /= stdev return case else: return case def __getitem__(self, ind): batch_x = self.feature_df.loc[self.all_IDs[ind]].values labels = self.labels_df.loc[self.all_IDs[ind]].values if self.flag == "TRAIN" and self.args.augmentation_ratio > 0: num_samples = len(self.all_IDs) num_columns = self.feature_df.shape[1] seq_len = int(self.feature_df.shape[0] / num_samples) batch_x = batch_x.reshape((1, seq_len, num_columns)) batch_x, labels, augmentation_tags = run_augmentation_single(batch_x, labels, self.args) batch_x = batch_x.reshape((1 * seq_len, num_columns)) return self.instance_norm(torch.from_numpy(batch_x)), \ torch.from_numpy(labels) def __len__(self): return len(self.all_IDs) ================================================ FILE: data_provider/m4.py ================================================ # This source code is provided for the purposes of scientific reproducibility # under the following limited license from Element AI Inc. The code is an # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis # expansion analysis for interpretable time series forecasting, # https://arxiv.org/abs/1905.10437). The copyright to the source code is # licensed under the Creative Commons - Attribution-NonCommercial 4.0 # International license (CC BY-NC 4.0): # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether # for the benefit of third parties or internally in production) requires an # explicit license. The subject-matter of the N-BEATS model and associated # materials are the property of Element AI Inc. and may be subject to patent # protection. No license to patents is granted hereunder (whether express or # implied). Copyright © 2020 Element AI Inc. All rights reserved. """ M4 Dataset """ import logging import os from collections import OrderedDict from dataclasses import dataclass from glob import glob import numpy as np import pandas as pd import patoolib from tqdm import tqdm import logging import os import pathlib import sys from urllib import request from huggingface_hub import hf_hub_download HUGGINGFACE_REPO = "thuml/Time-Series-Library" def _ensure_m4_triplet(root_dir="./dataset/m4", repo_id=HUGGINGFACE_REPO): root_dir = os.path.abspath(root_dir) os.makedirs(root_dir, exist_ok=True) files = { "M4-info.csv": "m4/M4-info.csv", "training.npz": "m4/training.npz", "test.npz": "m4/test.npz", } for name, remote in files.items(): dst = os.path.join(root_dir, name) if not os.path.exists(dst): path = hf_hub_download( repo_id=repo_id, filename=remote, repo_type="dataset", local_dir="./dataset", local_dir_use_symlinks=False ) def url_file_name(url: str) -> str: """ Extract file name from url. :param url: URL to extract file name from. :return: File name. """ return url.split('/')[-1] if len(url) > 0 else '' def download(url: str, file_path: str) -> None: """ Download a file to the given path. :param url: URL to download :param file_path: Where to download the content. """ def progress(count, block_size, total_size): progress_pct = float(count * block_size) / float(total_size) * 100.0 sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct)) sys.stdout.flush() if not os.path.isfile(file_path): opener = request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] request.install_opener(opener) pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True) f, _ = request.urlretrieve(url, file_path, progress) sys.stdout.write('\n') sys.stdout.flush() file_info = os.stat(f) logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') else: file_info = os.stat(file_path) logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') @dataclass() class M4Dataset: ids: np.ndarray groups: np.ndarray frequencies: np.ndarray horizons: np.ndarray values: np.ndarray @staticmethod def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset': """ Load cached dataset. :param training: Load training part if training is True, test part otherwise. """ _ensure_m4_triplet(dataset_file, repo_id=HUGGINGFACE_REPO) info_file = os.path.join(dataset_file, 'M4-info.csv') train_cache_file = os.path.join(dataset_file, 'training.npz') test_cache_file = os.path.join(dataset_file, 'test.npz') m4_info = pd.read_csv(info_file) return M4Dataset(ids=m4_info.M4id.values, groups=m4_info.SP.values, frequencies=m4_info.Frequency.values, horizons=m4_info.Horizon.values, values=np.load( train_cache_file if training else test_cache_file, allow_pickle=True)) @dataclass() class M4Meta: seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly'] horizons = [6, 8, 18, 13, 14, 48] frequencies = [1, 4, 12, 1, 1, 24] horizons_map = { 'Yearly': 6, 'Quarterly': 8, 'Monthly': 18, 'Weekly': 13, 'Daily': 14, 'Hourly': 48 } # different predict length frequency_map = { 'Yearly': 1, 'Quarterly': 4, 'Monthly': 12, 'Weekly': 1, 'Daily': 1, 'Hourly': 24 } history_size = { 'Yearly': 1.5, 'Quarterly': 1.5, 'Monthly': 1.5, 'Weekly': 10, 'Daily': 10, 'Hourly': 10 } # from interpretable.gin def load_m4_info() -> pd.DataFrame: """ Load M4Info file. :return: Pandas DataFrame of M4Info. """ # return pd.read_csv(INFO_FILE_PATH) ================================================ FILE: data_provider/uea.py ================================================ import os import numpy as np import pandas as pd import torch def collate_fn(data, max_len=None): """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create Args: data: len(batch_size) list of tuples (X, y). - X: torch tensor of shape (seq_length, feat_dim); variable seq_length. - y: torch tensor of shape (num_labels,) : class indices or numerical targets (for classification or regression, respectively). num_labels > 1 for multi-task models max_len: global fixed sequence length. Used for architectures requiring fixed length input, where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s Returns: X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input) targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output) target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor 0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding """ batch_size = len(data) features, labels = zip(*data) # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension) lengths = [X.shape[0] for X in features] # original sequence length for each time series if max_len is None: max_len = max(lengths) X = torch.zeros(batch_size, max_len, features[0].shape[-1]) # (batch_size, padded_length, feat_dim) for i in range(batch_size): end = min(lengths[i], max_len) X[i, :end, :] = features[i][:end, :] targets = torch.stack(labels, dim=0) # (batch_size, num_labels) padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16), max_len=max_len) # (batch_size, padded_length) boolean tensor, "1" means keep return X, targets, padding_masks def padding_mask(lengths, max_len=None): """ Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths, where 1 means keep element at this position (time step) """ batch_size = lengths.numel() max_len = max_len or lengths.max_val() # trick works because of overloading of 'or' operator for non-boolean types return (torch.arange(0, max_len, device=lengths.device) .type_as(lengths) .repeat(batch_size, 1) .lt(lengths.unsqueeze(1))) class Normalizer(object): """ Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization. """ def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None): """ Args: norm_type: choose from: "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps) "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows) mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values """ self.norm_type = norm_type self.mean = mean self.std = std self.min_val = min_val self.max_val = max_val def normalize(self, df): """ Args: df: input dataframe Returns: df: normalized dataframe """ if self.norm_type == "standardization": if self.mean is None: self.mean = df.mean() self.std = df.std() return (df - self.mean) / (self.std + np.finfo(float).eps) elif self.norm_type == "minmax": if self.max_val is None: self.max_val = df.max() self.min_val = df.min() return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps) elif self.norm_type == "per_sample_std": grouped = df.groupby(by=df.index) return (df - grouped.transform('mean')) / grouped.transform('std') elif self.norm_type == "per_sample_minmax": grouped = df.groupby(by=df.index) min_vals = grouped.transform('min') return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps) else: raise (NameError(f'Normalize method "{self.norm_type}" not implemented')) def interpolate_missing(y): """ Replaces NaN values in pd.Series `y` using linear interpolation """ if y.isna().any(): y = y.interpolate(method='linear', limit_direction='both') return y def subsample(y, limit=256, factor=2): """ If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor """ if len(y) > limit: return y[::factor].reset_index(drop=True) return y ================================================ FILE: docker-compose.yml ================================================ services: dev_tslib: image: tslib build: context: . target: tslib # args: # http_proxy: "http://192.168.8.135:7897" #optional, Modify it to your agent address # https_proxy: "http://192.168.8.135:7897" #optional, Modify it to your agent address container_name: tslib shm_size: 8gb tty: true restart: always environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility # - http_proxy=http://192.168.8.135:7897 #optional, Modify it to your agent address # - https_proxy=http://192.168.8.135:7897 #optional, Modify it to your agent address # ports: # - "8888:8888" # - "6006:6006" volumes: - workspace_data:/workspace working_dir: /workspace volumes: workspace_data: ================================================ FILE: exp/__init__.py ================================================ ================================================ FILE: exp/exp_anomaly_detection.py ================================================ from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, adjustment from sklearn.metrics import precision_recall_fscore_support from sklearn.metrics import accuracy_score import torch.multiprocessing torch.multiprocessing.set_sharing_strategy('file_system') import torch import torch.nn as nn from torch import optim import os import time import warnings import numpy as np warnings.filterwarnings('ignore') class Exp_Anomaly_Detection(Exp_Basic): def __init__(self, args): super(Exp_Anomaly_Detection, self).__init__(args) def _build_model(self): model = self.model_dict[self.args.model](self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): criterion = nn.MSELoss() return criterion def vali(self, vali_data, vali_loader, criterion): total_loss = [] self.model.eval() with torch.no_grad(): for i, (batch_x, _) in enumerate(vali_loader): batch_x = batch_x.float().to(self.device) outputs = self.model(batch_x, None, None, None) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, :, f_dim:] pred = outputs.detach() true = batch_x.detach() loss = criterion(pred, true) total_loss.append(loss.item()) total_loss = np.average(total_loss) self.model.train() return total_loss def train(self, setting): train_data, train_loader = self._get_data(flag='train') vali_data, vali_loader = self._get_data(flag='val') test_data, test_loader = self._get_data(flag='test') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x, batch_y) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) outputs = self.model(batch_x, None, None, None) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, :, f_dim:] loss = criterion(outputs, batch_x) train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() loss.backward() model_optim.step() print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break adjust_learning_rate(model_optim, epoch + 1, self.args) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) return self.model def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') train_data, train_loader = self._get_data(flag='train') if test: print('loading model') self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) attens_energy = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() self.anomaly_criterion = nn.MSELoss(reduce=False) # (1) stastic on the train set with torch.no_grad(): for i, (batch_x, batch_y) in enumerate(train_loader): batch_x = batch_x.float().to(self.device) # reconstruction outputs = self.model(batch_x, None, None, None) # criterion score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) score = score.detach().cpu().numpy() attens_energy.append(score) attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) train_energy = np.array(attens_energy) # (2) find the threshold attens_energy = [] test_labels = [] for i, (batch_x, batch_y) in enumerate(test_loader): batch_x = batch_x.float().to(self.device) # reconstruction outputs = self.model(batch_x, None, None, None) # criterion score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) score = score.detach().cpu().numpy() attens_energy.append(score) test_labels.append(batch_y) attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) test_energy = np.array(attens_energy) combined_energy = np.concatenate([train_energy, test_energy], axis=0) threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio) print("Threshold :", threshold) # (3) evaluation on the test set pred = (test_energy > threshold).astype(int) test_labels = np.concatenate(test_labels, axis=0).reshape(-1) test_labels = np.array(test_labels) gt = test_labels.astype(int) print("pred: ", pred.shape) print("gt: ", gt.shape) # (4) detection adjustment gt, pred = adjustment(gt, pred) pred = np.array(pred) gt = np.array(gt) print("pred: ", pred.shape) print("gt: ", gt.shape) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary') print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format( accuracy, precision, recall, f_score)) f = open("result_anomaly_detection.txt", 'a') f.write(setting + " \n") f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format( accuracy, precision, recall, f_score)) f.write('\n') f.write('\n') f.close() return ================================================ FILE: exp/exp_basic.py ================================================ import os import torch import importlib import pkgutil # Just put your model files under models/ folder # e.g., models/Transformer.py, models/LSTM.py, etc. # All models will be automatically detected and can be used by specifying their names. class Exp_Basic(object): def __init__(self, args): self.args = args # ------------------------------------------------------- # Automatically generate model map # ------------------------------------------------------- model_map = self._scan_models_directory() # Use smart dictionary self.model_dict = LazyModelDict(model_map) self.device = self._acquire_device() self.model = self._build_model().to(self.device) def _scan_models_directory(self): """ Automatically scan all .py files in the models folder """ model_map = {} models_dir = 'models' # Iterate through all files in 'models' directory if os.path.exists(models_dir): for filename in os.listdir(models_dir): # Ignore __init__.py and non-.py files if filename.endswith('.py') and filename != '__init__.py': # Remove .py extension to get module name module_name = filename[:-3] # Build full import path full_path = f"{models_dir}.{module_name}" # loading dict: {'Transformer': 'models.Transformer'} model_map[module_name] = full_path return model_map def _build_model(self): raise NotImplementedError return None def _acquire_device(self): if self.args.use_gpu and self.args.gpu_type == 'cuda': os.environ["CUDA_VISIBLE_DEVICES"] = str( self.args.gpu) if not self.args.use_multi_gpu else self.args.devices device = torch.device('cuda:{}'.format(self.args.gpu)) print('Use GPU: cuda:{}'.format(self.args.gpu)) elif self.args.use_gpu and self.args.gpu_type == 'mps': device = torch.device('mps') print('Use GPU: mps') else: device = torch.device('cpu') print('Use CPU') return device def _get_data(self): pass def vali(self): pass def train(self): pass def test(self): pass class LazyModelDict(dict): """ Smart Lazy-Loading Dictionary """ def __init__(self, model_map): self.model_map = model_map super().__init__() def __getitem__(self, key): if key in self: return super().__getitem__(key) if key not in self.model_map: raise NotImplementedError(f"Model [{key}] not found in 'models' directory.") module_path = self.model_map[key] try: print(f"🚀 Lazy Loading: {key} ...") module = importlib.import_module(module_path) except ImportError as e: print(f"❌ Error: Failed to import model [{key}]. Dependencies missing?") raise e # Try to find the model class if hasattr(module, 'Model'): model_class = module.Model elif hasattr(module, key): model_class = getattr(module, key) else: raise AttributeError(f"Module {module_path} has no class 'Model' or '{key}'") self[key] = model_class return model_class ================================================ FILE: exp/exp_classification.py ================================================ from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy import torch import torch.nn as nn from torch import optim import os import time import warnings import numpy as np import pdb warnings.filterwarnings('ignore') class Exp_Classification(Exp_Basic): def __init__(self, args): super(Exp_Classification, self).__init__(args) def _build_model(self): # model input depends on data train_data, train_loader = self._get_data(flag='TRAIN') test_data, test_loader = self._get_data(flag='TEST') self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len) self.args.pred_len = 0 self.args.enc_in = train_data.feature_df.shape[1] self.args.num_class = len(train_data.class_names) # model init model = self.model_dict[self.args.model](self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): criterion = nn.CrossEntropyLoss() return criterion def vali(self, vali_data, vali_loader, criterion): total_loss = [] preds = [] trues = [] self.model.eval() with torch.no_grad(): for i, (batch_x, label, padding_mask) in enumerate(vali_loader): batch_x = batch_x.float().to(self.device) padding_mask = padding_mask.float().to(self.device) label = label.to(self.device) outputs = self.model(batch_x, padding_mask, None, None) pred = outputs.detach() loss = criterion(pred, label.long().squeeze()) total_loss.append(loss.item()) preds.append(outputs.detach()) trues.append(label) total_loss = np.average(total_loss) preds = torch.cat(preds, 0) trues = torch.cat(trues, 0) probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample trues = trues.flatten().cpu().numpy() accuracy = cal_accuracy(predictions, trues) self.model.train() return total_loss, accuracy def train(self, setting): train_data, train_loader = self._get_data(flag='TRAIN') vali_data, vali_loader = self._get_data(flag='TEST') test_data, test_loader = self._get_data(flag='TEST') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x, label, padding_mask) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) padding_mask = padding_mask.float().to(self.device) label = label.to(self.device) outputs = self.model(batch_x, padding_mask, None, None) loss = criterion(outputs, label.long().squeeze(-1)) train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0) model_optim.step() print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion) test_loss, test_accuracy = self.vali(test_data, test_loader, criterion) print( "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}" .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy)) early_stopping(-val_accuracy, self.model, path) if early_stopping.early_stop: print("Early stopping") break best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) return self.model def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='TEST') if test: print('loading model') self.model.load_state_dict(torch.load(os.path.join(self.args.checkpoints, setting, 'checkpoint.pth'))) preds = [] trues = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() with torch.no_grad(): for i, (batch_x, label, padding_mask) in enumerate(test_loader): batch_x = batch_x.float().to(self.device) padding_mask = padding_mask.float().to(self.device) label = label.to(self.device) outputs = self.model(batch_x, padding_mask, None, None) preds.append(outputs.detach()) trues.append(label) preds = torch.cat(preds, 0) trues = torch.cat(trues, 0) print('test shape:', preds.shape, trues.shape) probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample trues = trues.flatten().cpu().numpy() accuracy = cal_accuracy(predictions, trues) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) print('accuracy:{}'.format(accuracy)) file_name='result_classification.txt' f = open(os.path.join(folder_path,file_name), 'a') f.write(setting + " \n") f.write('accuracy:{}'.format(accuracy)) f.write('\n') f.write('\n') f.close() return ================================================ FILE: exp/exp_imputation.py ================================================ from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, visual from utils.metrics import metric import torch import torch.nn as nn from torch import optim import os import time import warnings import numpy as np warnings.filterwarnings('ignore') class Exp_Imputation(Exp_Basic): def __init__(self, args): super(Exp_Imputation, self).__init__(args) def _build_model(self): model = self.model_dict[self.args.model](self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): criterion = nn.MSELoss() return criterion def vali(self, vali_data, vali_loader, criterion): total_loss = [] self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): batch_x = batch_x.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) # random mask B, T, N = batch_x.shape """ B = batch size T = seq len N = number of features """ mask = torch.rand((B, T, N)).to(self.device) mask[mask <= self.args.mask_rate] = 0 # masked mask[mask > self.args.mask_rate] = 1 # remained inp = batch_x.masked_fill(mask == 0, 0) outputs = self.model(inp, batch_x_mark, None, None, mask) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, :, f_dim:] # add support for MS batch_x = batch_x[:, :, f_dim:] mask = mask[:, :, f_dim:] pred = outputs.detach() true = batch_x.detach() mask = mask.detach() loss = criterion(pred[mask == 0], true[mask == 0]) total_loss.append(loss.item()) total_loss = np.average(total_loss) self.model.train() return total_loss def train(self, setting): train_data, train_loader = self._get_data(flag='train') vali_data, vali_loader = self._get_data(flag='val') test_data, test_loader = self._get_data(flag='test') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) # random mask B, T, N = batch_x.shape mask = torch.rand((B, T, N)).to(self.device) mask[mask <= self.args.mask_rate] = 0 # masked mask[mask > self.args.mask_rate] = 1 # remained inp = batch_x.masked_fill(mask == 0, 0) outputs = self.model(inp, batch_x_mark, None, None, mask) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, :, f_dim:] # add support for MS batch_x = batch_x[:, :, f_dim:] mask = mask[:, :, f_dim:] loss = criterion(outputs[mask == 0], batch_x[mask == 0]) train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() loss.backward() model_optim.step() print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break adjust_learning_rate(model_optim, epoch + 1, self.args) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) return self.model def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') if test: print('loading model') self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) preds = [] trues = [] masks = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): batch_x = batch_x.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) # random mask B, T, N = batch_x.shape mask = torch.rand((B, T, N)).to(self.device) mask[mask <= self.args.mask_rate] = 0 # masked mask[mask > self.args.mask_rate] = 1 # remained inp = batch_x.masked_fill(mask == 0, 0) # imputation outputs = self.model(inp, batch_x_mark, None, None, mask) # eval f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, :, f_dim:] # add support for MS batch_x = batch_x[:, :, f_dim:] mask = mask[:, :, f_dim:] outputs = outputs.detach().cpu().numpy() pred = outputs true = batch_x.detach().cpu().numpy() preds.append(pred) trues.append(true) masks.append(mask.detach().cpu()) if i % 20 == 0: filled = true[0, :, -1].copy() filled = filled * mask[0, :, -1].detach().cpu().numpy() + \ pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy()) visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf')) preds = np.concatenate(preds, 0) trues = np.concatenate(trues, 0) masks = np.concatenate(masks, 0) print('test shape:', preds.shape, trues.shape) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0]) print('mse:{}, mae:{}'.format(mse, mae)) f = open("result_imputation.txt", 'a') f.write(setting + " \n") f.write('mse:{}, mae:{}'.format(mse, mae)) f.write('\n') f.write('\n') f.close() np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) np.save(folder_path + 'pred.npy', preds) np.save(folder_path + 'true.npy', trues) return ================================================ FILE: exp/exp_long_term_forecasting.py ================================================ from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, visual from utils.metrics import metric import torch import torch.nn as nn from torch import optim import os import time import warnings import numpy as np from utils.dtw_metric import dtw, accelerated_dtw from utils.augmentation import run_augmentation, run_augmentation_single warnings.filterwarnings('ignore') class Exp_Long_Term_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Long_Term_Forecast, self).__init__(args) def _build_model(self): model = self.model_dict[self.args.model](self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): criterion = nn.MSELoss() return criterion def vali(self, vali_data, vali_loader, criterion): total_loss = [] self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float() batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) pred = outputs.detach() true = batch_y.detach() loss = criterion(pred, true) total_loss.append(loss.item()) total_loss = np.average(total_loss) self.model.train() return total_loss def train(self, setting): train_data, train_loader = self._get_data(flag='train') vali_data, vali_loader = self._get_data(flag='val') test_data, test_loader = self._get_data(flag='test') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() if self.args.use_amp: scaler = torch.cuda.amp.GradScaler() for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) loss = criterion(outputs, batch_y) train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() if self.args.use_amp: scaler.scale(loss).backward() scaler.step(model_optim) scaler.update() else: loss.backward() model_optim.step() print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break adjust_learning_rate(model_optim, epoch + 1, self.args) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) return self.model def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') if test: print('loading model') self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) preds = [] trues = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, :] batch_y = batch_y[:, -self.args.pred_len:, :].to(self.device) outputs = outputs.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() if test_data.scale and self.args.inverse: shape = batch_y.shape if outputs.shape[-1] != batch_y.shape[-1]: outputs = np.tile(outputs, [1, 1, int(batch_y.shape[-1] / outputs.shape[-1])]) outputs = test_data.inverse_transform(outputs.reshape(shape[0] * shape[1], -1)).reshape(shape) batch_y = test_data.inverse_transform(batch_y.reshape(shape[0] * shape[1], -1)).reshape(shape) outputs = outputs[:, :, f_dim:] batch_y = batch_y[:, :, f_dim:] pred = outputs true = batch_y preds.append(pred) trues.append(true) if i % 20 == 0: input = batch_x.detach().cpu().numpy() if test_data.scale and self.args.inverse: shape = input.shape input = test_data.inverse_transform(input.reshape(shape[0] * shape[1], -1)).reshape(shape) gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) preds = np.concatenate(preds, axis=0) trues = np.concatenate(trues, axis=0) print('test shape:', preds.shape, trues.shape) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) print('test shape:', preds.shape, trues.shape) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) # dtw calculation if self.args.use_dtw: dtw_list = [] manhattan_distance = lambda x, y: np.abs(x - y) for i in range(preds.shape[0]): x = preds[i].reshape(-1, 1) y = trues[i].reshape(-1, 1) if i % 100 == 0: print("calculating dtw iter:", i) d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance) dtw_list.append(d) dtw = np.array(dtw_list).mean() else: dtw = 'Not calculated' mae, mse, rmse, mape, mspe = metric(preds, trues) print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw)) f = open("result_long_term_forecast.txt", 'a') f.write(setting + " \n") f.write('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw)) f.write('\n') f.write('\n') f.close() np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) np.save(folder_path + 'pred.npy', preds) np.save(folder_path + 'true.npy', trues) return ================================================ FILE: exp/exp_short_term_forecasting.py ================================================ from data_provider.data_factory import data_provider from data_provider.m4 import M4Meta from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, visual from utils.losses import mape_loss, mase_loss, smape_loss from utils.m4_summary import M4Summary import torch import torch.nn as nn from torch import optim import os import time import warnings import numpy as np import pandas warnings.filterwarnings('ignore') class Exp_Short_Term_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Short_Term_Forecast, self).__init__(args) def _build_model(self): if self.args.data == 'm4': self.args.pred_len = M4Meta.horizons_map[self.args.seasonal_patterns] # Up to M4 config self.args.seq_len = 2 * self.args.pred_len # input_len = 2*pred_len self.args.label_len = self.args.pred_len self.args.frequency_map = M4Meta.frequency_map[self.args.seasonal_patterns] model = self.model_dict[self.args.model](self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self, loss_name='MSE'): if loss_name == 'MSE': return nn.MSELoss() elif loss_name == 'MAPE': return mape_loss() elif loss_name == 'MASE': return mase_loss() elif loss_name == 'SMAPE': return smape_loss() def train(self, setting): train_data, train_loader = self._get_data(flag='train') vali_data, vali_loader = self._get_data(flag='val') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) time_now = time.time() train_steps = len(train_loader) early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion(self.args.loss) mse = nn.MSELoss() for epoch in range(self.args.train_epochs): iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) outputs = self.model(batch_x, None, dec_inp, None) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) batch_y_mark = batch_y_mark[:, -self.args.pred_len:, f_dim:].to(self.device) loss_value = criterion(batch_x, self.args.frequency_map, outputs, batch_y, batch_y_mark) loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :])) loss = loss_value # + loss_sharpness * 1e-5 train_loss.append(loss.item()) if (i + 1) % 100 == 0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time() - time_now) / iter_count left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() loss.backward() model_optim.step() print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(train_loader, vali_loader, criterion) test_loss = vali_loss print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break adjust_learning_rate(model_optim, epoch + 1, self.args) best_model_path = path + '/' + 'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) return self.model def vali(self, train_loader, vali_loader, criterion): x, _ = train_loader.dataset.last_insample_window() y = vali_loader.dataset.timeseries x = torch.tensor(x, dtype=torch.float32).to(self.device) x = x.unsqueeze(-1) self.model.eval() with torch.no_grad(): # decoder input B, _, C = x.shape dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device) dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float() # encoder - decoder outputs = torch.zeros((B, self.args.pred_len, C)).float() # .to(self.device) id_list = np.arange(0, B, 500) # validation set size id_list = np.append(id_list, B) for i in range(len(id_list) - 1): outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x[id_list[i]:id_list[i + 1]], None, dec_inp[id_list[i]:id_list[i + 1]], None).detach().cpu() f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] pred = outputs true = torch.from_numpy(np.array(y)) batch_y_mark = torch.ones(true.shape) loss = criterion(x.detach().cpu()[:, :, 0], self.args.frequency_map, pred[:, :, 0], true, batch_y_mark) self.model.train() return loss def test(self, setting, test=0): _, train_loader = self._get_data(flag='train') _, test_loader = self._get_data(flag='test') x, _ = train_loader.dataset.last_insample_window() y = test_loader.dataset.timeseries x = torch.tensor(x, dtype=torch.float32).to(self.device) x = x.unsqueeze(-1) if test: print('loading model') self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() with torch.no_grad(): B, _, C = x.shape dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device) dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float() # encoder - decoder outputs = torch.zeros((B, self.args.pred_len, C)).float().to(self.device) id_list = np.arange(0, B, 1) id_list = np.append(id_list, B) for i in range(len(id_list) - 1): outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x[id_list[i]:id_list[i + 1]], None, dec_inp[id_list[i]:id_list[i + 1]], None) if id_list[i] % 1000 == 0: print(id_list[i]) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, f_dim:] outputs = outputs.detach().cpu().numpy() preds = outputs trues = y x = x.detach().cpu().numpy() for i in range(0, preds.shape[0], preds.shape[0] // 10): gt = np.concatenate((x[i, :, 0], trues[i]), axis=0) pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0) visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) print('test shape:', preds.shape) # result save folder_path = './m4_results/' + self.args.model + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(self.args.pred_len)]) forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]] forecasts_df.index.name = 'id' forecasts_df.set_index(forecasts_df.columns[0], inplace=True) forecasts_df.to_csv(folder_path + self.args.seasonal_patterns + '_forecast.csv') print(self.args.model) file_path = './m4_results/' + self.args.model + '/' if 'Weekly_forecast.csv' in os.listdir(file_path) \ and 'Monthly_forecast.csv' in os.listdir(file_path) \ and 'Yearly_forecast.csv' in os.listdir(file_path) \ and 'Daily_forecast.csv' in os.listdir(file_path) \ and 'Hourly_forecast.csv' in os.listdir(file_path) \ and 'Quarterly_forecast.csv' in os.listdir(file_path): m4_summary = M4Summary(file_path, self.args.root_path) # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True) smape_results, owa_results, mape, mase = m4_summary.evaluate() print('smape:', smape_results) print('mape:', mape) print('mase:', mase) print('owa:', owa_results) else: print('After all 6 tasks are finished, you can calculate the averaged index') return ================================================ FILE: exp/exp_zero_shot_forecasting.py ================================================ from data_provider.data_factory import data_provider from exp.exp_basic import Exp_Basic from utils.tools import EarlyStopping, adjust_learning_rate, visual from utils.metrics import metric import torch import torch.nn as nn from torch import optim import os import time import warnings import numpy as np from utils.dtw_metric import dtw, accelerated_dtw from utils.augmentation import run_augmentation, run_augmentation_single warnings.filterwarnings('ignore') class Exp_Zero_Shot_Forecast(Exp_Basic): def __init__(self, args): super(Exp_Zero_Shot_Forecast, self).__init__(args) def _build_model(self): model = self.model_dict[self.args.model](self.args).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): data_set, data_loader = data_provider(self.args, flag) return data_set, data_loader def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim def _select_criterion(self): criterion = nn.MSELoss() return criterion def test(self, setting, test=0): test_data, test_loader = self._get_data(flag='test') preds = [] trues = [] folder_path = './test_results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) self.model.eval() with torch.no_grad(): for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): # start_time = time.time() batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) batch_x_mark = batch_x_mark.float().to(self.device) batch_y_mark = batch_y_mark.float().to(self.device) # decoder input dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # encoder - decoder if self.args.use_amp: with torch.cuda.amp.autocast(): outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) else: outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) # print("Test cost time: {}".format(time.time() - start_time)) f_dim = -1 if self.args.features == 'MS' else 0 outputs = outputs[:, -self.args.pred_len:, :] batch_y = batch_y[:, -self.args.pred_len:, :].to(self.device) outputs = outputs.detach().cpu().numpy() batch_y = batch_y.detach().cpu().numpy() if test_data.scale and self.args.inverse: shape = batch_y.shape if outputs.shape[-1] != batch_y.shape[-1]: outputs = np.tile(outputs, [1, 1, int(batch_y.shape[-1] / outputs.shape[-1])]) outputs = test_data.inverse_transform(outputs.reshape(shape[0] * shape[1], -1)).reshape(shape) batch_y = test_data.inverse_transform(batch_y.reshape(shape[0] * shape[1], -1)).reshape(shape) outputs = outputs[:, :, f_dim:] batch_y = batch_y[:, :, f_dim:] pred = outputs true = batch_y preds.append(pred) trues.append(true) if i % 20 == 0: input = batch_x.detach().cpu().numpy() if test_data.scale and self.args.inverse: shape = input.shape input = test_data.inverse_transform(input.reshape(shape[0] * shape[1], -1)).reshape(shape) gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) preds = np.concatenate(preds, axis=0) trues = np.concatenate(trues, axis=0) print('test shape:', preds.shape, trues.shape) preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) print('test shape:', preds.shape, trues.shape) # result save folder_path = './results/' + setting + '/' if not os.path.exists(folder_path): os.makedirs(folder_path) # dtw calculation if self.args.use_dtw: dtw_list = [] manhattan_distance = lambda x, y: np.abs(x - y) for i in range(preds.shape[0]): x = preds[i].reshape(-1, 1) y = trues[i].reshape(-1, 1) if i % 100 == 0: print("calculating dtw iter:", i) d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance) dtw_list.append(d) dtw = np.array(dtw_list).mean() else: dtw = 'Not calculated' mae, mse, rmse, mape, mspe = metric(preds, trues) print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw)) f = open("result_zero_shot_forecast_search.txt", 'a') f.write(setting + " \n") f.write('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw)) f.write('\n') f.write('\n') f.close() np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) np.save(folder_path + 'pred.npy', preds) np.save(folder_path + 'true.npy', trues) return ================================================ FILE: layers/AutoCorrelation.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import matplotlib.pyplot as plt import numpy as np import math from math import sqrt import os class AutoCorrelation(nn.Module): """ AutoCorrelation Mechanism with the following two phases: (1) period-based dependencies discovery (2) time delay aggregation This block can replace the self-attention family mechanism seamlessly. """ def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False): super(AutoCorrelation, self).__init__() self.factor = factor self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def time_delay_agg_training(self, values, corr): """ SpeedUp version of Autocorrelation (a batch-normalization style design) This is for the training phase. """ head = values.shape[1] channel = values.shape[2] length = values.shape[3] # find top k top_k = int(self.factor * math.log(length)) mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1] weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) # update corr tmp_corr = torch.softmax(weights, dim=-1) # aggregation tmp_values = values delays_agg = torch.zeros_like(values).float() for i in range(top_k): pattern = torch.roll(tmp_values, -int(index[i]), -1) delays_agg = delays_agg + pattern * \ (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) return delays_agg def time_delay_agg_inference(self, values, corr): """ SpeedUp version of Autocorrelation (a batch-normalization style design) This is for the inference phase. """ batch = values.shape[0] head = values.shape[1] channel = values.shape[2] length = values.shape[3] # index init init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(values.device) # find top k top_k = int(self.factor * math.log(length)) mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) weights, delay = torch.topk(mean_value, top_k, dim=-1) # update corr tmp_corr = torch.softmax(weights, dim=-1) # aggregation tmp_values = values.repeat(1, 1, 1, 2) delays_agg = torch.zeros_like(values).float() for i in range(top_k): tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) delays_agg = delays_agg + pattern * \ (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) return delays_agg def time_delay_agg_full(self, values, corr): """ Standard version of Autocorrelation """ batch = values.shape[0] head = values.shape[1] channel = values.shape[2] length = values.shape[3] # index init init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(values.device) # find top k top_k = int(self.factor * math.log(length)) weights, delay = torch.topk(corr, top_k, dim=-1) # update corr tmp_corr = torch.softmax(weights, dim=-1) # aggregation tmp_values = values.repeat(1, 1, 1, 2) delays_agg = torch.zeros_like(values).float() for i in range(top_k): tmp_delay = init_index + delay[..., i].unsqueeze(-1) pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) return delays_agg def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape if L > S: zeros = torch.zeros_like(queries[:, :(L - S), :]).float() values = torch.cat([values, zeros], dim=1) keys = torch.cat([keys, zeros], dim=1) else: values = values[:, :L, :, :] keys = keys[:, :L, :, :] # period-based dependencies q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) res = q_fft * torch.conj(k_fft) corr = torch.fft.irfft(res, dim=-1) # time delay agg if self.training: V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) else: V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) if self.output_attention: return (V.contiguous(), corr.permute(0, 3, 1, 2)) else: return (V.contiguous(), None) class AutoCorrelationLayer(nn.Module): def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None): super(AutoCorrelationLayer, self).__init__() d_keys = d_keys or (d_model // n_heads) d_values = d_values or (d_model // n_heads) self.inner_correlation = correlation self.query_projection = nn.Linear(d_model, d_keys * n_heads) self.key_projection = nn.Linear(d_model, d_keys * n_heads) self.value_projection = nn.Linear(d_model, d_values * n_heads) self.out_projection = nn.Linear(d_values * n_heads, d_model) self.n_heads = n_heads def forward(self, queries, keys, values, attn_mask): B, L, _ = queries.shape _, S, _ = keys.shape H = self.n_heads queries = self.query_projection(queries).view(B, L, H, -1) keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) out, attn = self.inner_correlation( queries, keys, values, attn_mask ) out = out.view(B, L, -1) return self.out_projection(out), attn ================================================ FILE: layers/Autoformer_EncDec.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class my_Layernorm(nn.Module): """ Special designed layernorm for the seasonal part """ def __init__(self, channels): super(my_Layernorm, self).__init__() self.layernorm = nn.LayerNorm(channels) def forward(self, x): x_hat = self.layernorm(x) bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) return x_hat - bias class moving_avg(nn.Module): """ Moving average block to highlight the trend of time series """ def __init__(self, kernel_size, stride): super(moving_avg, self).__init__() self.kernel_size = kernel_size self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) def forward(self, x): # padding on the both ends of time series front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) x = torch.cat([front, x, end], dim=1) x = self.avg(x.permute(0, 2, 1)) x = x.permute(0, 2, 1) return x class series_decomp(nn.Module): """ Series decomposition block """ def __init__(self, kernel_size): super(series_decomp, self).__init__() self.moving_avg = moving_avg(kernel_size, stride=1) def forward(self, x): moving_mean = self.moving_avg(x) res = x - moving_mean return res, moving_mean class series_decomp_multi(nn.Module): """ Multiple Series decomposition block from FEDformer """ def __init__(self, kernel_size): super(series_decomp_multi, self).__init__() self.kernel_size = kernel_size self.series_decomp = [series_decomp(kernel) for kernel in kernel_size] def forward(self, x): moving_mean = [] res = [] for func in self.series_decomp: sea, moving_avg = func(x) moving_mean.append(moving_avg) res.append(sea) sea = sum(res) / len(res) moving_mean = sum(moving_mean) / len(moving_mean) return sea, moving_mean class EncoderLayer(nn.Module): """ Autoformer encoder layer with the progressive decomposition architecture """ def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.attention = attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) self.decomp1 = series_decomp(moving_avg) self.decomp2 = series_decomp(moving_avg) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, attn_mask=None): new_x, attn = self.attention( x, x, x, attn_mask=attn_mask ) x = x + self.dropout(new_x) x, _ = self.decomp1(x) y = x y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) res, _ = self.decomp2(x + y) return res, attn class Encoder(nn.Module): """ Autoformer encoder """ def __init__(self, attn_layers, conv_layers=None, norm_layer=None): super(Encoder, self).__init__() self.attn_layers = nn.ModuleList(attn_layers) self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None self.norm = norm_layer def forward(self, x, attn_mask=None): attns = [] if self.conv_layers is not None: for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): x, attn = attn_layer(x, attn_mask=attn_mask) x = conv_layer(x) attns.append(attn) x, attn = self.attn_layers[-1](x) attns.append(attn) else: for attn_layer in self.attn_layers: x, attn = attn_layer(x, attn_mask=attn_mask) attns.append(attn) if self.norm is not None: x = self.norm(x) return x, attns class DecoderLayer(nn.Module): """ Autoformer decoder layer with the progressive decomposition architecture """ def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): super(DecoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention self.cross_attention = cross_attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) self.decomp1 = series_decomp(moving_avg) self.decomp2 = series_decomp(moving_avg) self.decomp3 = series_decomp(moving_avg) self.dropout = nn.Dropout(dropout) self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, padding_mode='circular', bias=False) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, cross, x_mask=None, cross_mask=None): x = x + self.dropout(self.self_attention( x, x, x, attn_mask=x_mask )[0]) x, trend1 = self.decomp1(x) x = x + self.dropout(self.cross_attention( x, cross, cross, attn_mask=cross_mask )[0]) x, trend2 = self.decomp2(x) y = x y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) x, trend3 = self.decomp3(x + y) residual_trend = trend1 + trend2 + trend3 residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) return x, residual_trend class Decoder(nn.Module): """ Autoformer encoder """ def __init__(self, layers, norm_layer=None, projection=None): super(Decoder, self).__init__() self.layers = nn.ModuleList(layers) self.norm = norm_layer self.projection = projection def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): for layer in self.layers: x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) trend = trend + residual_trend if self.norm is not None: x = self.norm(x) if self.projection is not None: x = self.projection(x) return x, trend ================================================ FILE: layers/Conv_Blocks.py ================================================ import torch import torch.nn as nn class Inception_Block_V1(nn.Module): def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True): super(Inception_Block_V1, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.num_kernels = num_kernels kernels = [] for i in range(self.num_kernels): kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i)) self.kernels = nn.ModuleList(kernels) if init_weight: self._initialize_weights() def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self, x): res_list = [] for i in range(self.num_kernels): res_list.append(self.kernels[i](x)) res = torch.stack(res_list, dim=-1).mean(-1) return res class Inception_Block_V2(nn.Module): def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True): super(Inception_Block_V2, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.num_kernels = num_kernels kernels = [] for i in range(self.num_kernels // 2): kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1])) kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0])) kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1)) self.kernels = nn.ModuleList(kernels) if init_weight: self._initialize_weights() def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self, x): res_list = [] for i in range(self.num_kernels // 2 * 2 + 1): res_list.append(self.kernels[i](x)) res = torch.stack(res_list, dim=-1).mean(-1) return res ================================================ FILE: layers/Crossformer_EncDec.py ================================================ import torch import torch.nn as nn from einops import rearrange, repeat from layers.SelfAttention_Family import TwoStageAttentionLayer class SegMerging(nn.Module): def __init__(self, d_model, win_size, norm_layer=nn.LayerNorm): super().__init__() self.d_model = d_model self.win_size = win_size self.linear_trans = nn.Linear(win_size * d_model, d_model) self.norm = norm_layer(win_size * d_model) def forward(self, x): batch_size, ts_d, seg_num, d_model = x.shape pad_num = seg_num % self.win_size if pad_num != 0: pad_num = self.win_size - pad_num x = torch.cat((x, x[:, :, -pad_num:, :]), dim=-2) seg_to_merge = [] for i in range(self.win_size): seg_to_merge.append(x[:, :, i::self.win_size, :]) x = torch.cat(seg_to_merge, -1) x = self.norm(x) x = self.linear_trans(x) return x class scale_block(nn.Module): def __init__(self, configs, win_size, d_model, n_heads, d_ff, depth, dropout, \ seg_num=10, factor=10): super(scale_block, self).__init__() if win_size > 1: self.merge_layer = SegMerging(d_model, win_size, nn.LayerNorm) else: self.merge_layer = None self.encode_layers = nn.ModuleList() for i in range(depth): self.encode_layers.append(TwoStageAttentionLayer(configs, seg_num, factor, d_model, n_heads, \ d_ff, dropout)) def forward(self, x, attn_mask=None, tau=None, delta=None): _, ts_dim, _, _ = x.shape if self.merge_layer is not None: x = self.merge_layer(x) for layer in self.encode_layers: x = layer(x) return x, None class Encoder(nn.Module): def __init__(self, attn_layers): super(Encoder, self).__init__() self.encode_blocks = nn.ModuleList(attn_layers) def forward(self, x): encode_x = [] encode_x.append(x) for block in self.encode_blocks: x, attns = block(x) encode_x.append(x) return encode_x, None class DecoderLayer(nn.Module): def __init__(self, self_attention, cross_attention, seg_len, d_model, d_ff=None, dropout=0.1): super(DecoderLayer, self).__init__() self.self_attention = self_attention self.cross_attention = cross_attention self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.MLP1 = nn.Sequential(nn.Linear(d_model, d_model), nn.GELU(), nn.Linear(d_model, d_model)) self.linear_pred = nn.Linear(d_model, seg_len) def forward(self, x, cross): batch = x.shape[0] x = self.self_attention(x) x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model') cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model') tmp, attn = self.cross_attention(x, cross, cross, None, None, None,) x = x + self.dropout(tmp) y = x = self.norm1(x) y = self.MLP1(y) dec_output = self.norm2(x + y) dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b=batch) layer_predict = self.linear_pred(dec_output) layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len') return dec_output, layer_predict class Decoder(nn.Module): def __init__(self, layers): super(Decoder, self).__init__() self.decode_layers = nn.ModuleList(layers) def forward(self, x, cross): final_predict = None i = 0 ts_d = x.shape[1] for layer in self.decode_layers: cross_enc = cross[i] x, layer_predict = layer(x, cross_enc) if final_predict is None: final_predict = layer_predict else: final_predict = final_predict + layer_predict i += 1 final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d=ts_d) return final_predict ================================================ FILE: layers/DWT_Decomposition.py ================================================ # -*- coding: utf-8 -*- """ Created on Sun Jan 5 @author: Murad SISLab, USF mmurad@usf.edu https://github.com/Secure-and-Intelligent-Systems-Lab/WPMixer """ import torch import torch.nn as nn import pywt import numpy as np import torch.nn.functional as F from torch.autograd import Function class Decomposition(nn.Module): def __init__(self, input_length=[], pred_length=[], wavelet_name=[], level=[], batch_size=[], channel=[], d_model=[], tfactor=[], dfactor=[], device=[], no_decomposition=[], use_amp=[]): super(Decomposition, self).__init__() self.input_length = input_length self.pred_length = pred_length self.wavelet_name = wavelet_name self.level = level self.batch_size = batch_size self.channel = channel self.d_model = d_model self.device = device self.no_decomposition = no_decomposition self.use_amp = use_amp self.eps = 1e-5 self.dwt = DWT1DForward(wave=self.wavelet_name, J=self.level, use_amp=self.use_amp).cuda() if self.device.type == 'cuda' else DWT1DForward( wave=self.wavelet_name, J=self.level, use_amp=self.use_amp) self.idwt = DWT1DInverse(wave=self.wavelet_name, use_amp=self.use_amp).cuda() if self.device.type == 'cuda' else DWT1DInverse( wave=self.wavelet_name, use_amp=self.use_amp) self.input_w_dim = self._dummy_forward(self.input_length) if not self.no_decomposition else [ self.input_length] # length of the input seq after decompose self.pred_w_dim = self._dummy_forward(self.pred_length) if not self.no_decomposition else [ self.pred_length] # required length of the pred seq after decom self.tfactor = tfactor self.dfactor = dfactor ################################# self.affine = False ################################# if self.affine: self._init_params() def transform(self, x): # input: x shape: batch, channel, seq if not self.no_decomposition: yl, yh = self._wavelet_decompose(x) else: yl, yh = x, [] # no decompose: returning the same value in yl return yl, yh def inv_transform(self, yl, yh): if not self.no_decomposition: x = self._wavelet_reverse_decompose(yl, yh) else: x = yl # no decompose: returning the same value in x return x def _dummy_forward(self, input_length): dummy_x = torch.ones((self.batch_size, self.channel, input_length)).to(self.device) yl, yh = self.dwt(dummy_x) l = [] l.append(yl.shape[-1]) for i in range(len(yh)): l.append(yh[i].shape[-1]) return l def _init_params(self): self.affine_weight = nn.Parameter(torch.ones((self.level + 1, self.channel))) self.affine_bias = nn.Parameter(torch.zeros((self.level + 1, self.channel))) def _wavelet_decompose(self, x): # input: x shape: batch, channel, seq yl, yh = self.dwt(x) if self.affine: yl = yl.transpose(1, 2) # batch, seq, channel yl = yl * self.affine_weight[0] yl = yl + self.affine_bias[0] yl = yl.transpose(1, 2) # batch, channel, seq for i in range(self.level): yh_ = yh[i].transpose(1, 2) # batch, seq, channel yh_ = yh_ * self.affine_weight[i + 1] yh_ = yh_ + self.affine_bias[i + 1] yh[i] = yh_.transpose(1, 2) # batch, channel, seq return yl, yh def _wavelet_reverse_decompose(self, yl, yh): if self.affine: yl = yl.transpose(1, 2) # batch, seq, channel yl = yl - self.affine_bias[0] yl = yl / (self.affine_weight[0] + self.eps) yl = yl.transpose(1, 2) # batch, channel, seq for i in range(self.level): yh_ = yh[i].transpose(1, 2) # batch, seq, channel yh_ = yh_ - self.affine_bias[i + 1] yh_ = yh_ / (self.affine_weight[i + 1] + self.eps) yh[i] = yh_.transpose(1, 2) # batch, channel, seq x = self.idwt((yl, yh)) return x # shape: batch, channel, seq ############################################################################################### """ Following codes are combined from https://github.com/fbcotter/pytorch_wavelets. To use Wavelet decomposition, you do not need to modify any of the codes below this line, we can just play with the class Decomposition(above) """ ############################################################################################### class DWT1DForward(nn.Module): """ Performs a 1d DWT Forward decomposition of an image Args: J (int): Number of levels of decomposition wave (str or pywt.Wavelet or tuple(ndarray)): Which wavelet to use. Can be: 1) a string to pass to pywt.Wavelet constructor 2) a pywt.Wavelet class 3) a tuple of numpy arrays (h0, h1) mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. The padding scheme """ def __init__(self, J=1, wave='db1', mode='zero', use_amp=False): super().__init__() self.use_amp = use_amp if isinstance(wave, str): wave = pywt.Wavelet(wave) if isinstance(wave, pywt.Wavelet): h0, h1 = wave.dec_lo, wave.dec_hi else: assert len(wave) == 2 h0, h1 = wave[0], wave[1] # Prepare the filters - this makes them into column filters filts = prep_filt_afb1d(h0, h1) self.register_buffer('h0', filts[0]) self.register_buffer('h1', filts[1]) self.J = J self.mode = mode def forward(self, x): """ Forward pass of the DWT. Args: x (tensor): Input of shape :math:`(N, C_{in}, L_{in})` Returns: (yl, yh) tuple of lowpass (yl) and bandpass (yh) coefficients. yh is a list of length J with the first entry being the finest scale coefficients. """ assert x.ndim == 3, "Can only handle 3d inputs (N, C, L)" highs = [] x0 = x mode = mode_to_int(self.mode) # Do a multilevel transform for j in range(self.J): x0, x1 = AFB1D.apply(x0, self.h0, self.h1, mode, self.use_amp) highs.append(x1) return x0, highs class DWT1DInverse(nn.Module): """ Performs a 1d DWT Inverse reconstruction of an image Args: wave (str or pywt.Wavelet or tuple(ndarray)): Which wavelet to use. Can be: 1) a string to pass to pywt.Wavelet constructor 2) a pywt.Wavelet class 3) a tuple of numpy arrays (h0, h1) mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. The padding scheme """ def __init__(self, wave='db1', mode='zero', use_amp=False): super().__init__() self.use_amp = use_amp if isinstance(wave, str): wave = pywt.Wavelet(wave) if isinstance(wave, pywt.Wavelet): g0, g1 = wave.rec_lo, wave.rec_hi else: assert len(wave) == 2 g0, g1 = wave[0], wave[1] # Prepare the filters filts = prep_filt_sfb1d(g0, g1) self.register_buffer('g0', filts[0]) self.register_buffer('g1', filts[1]) self.mode = mode def forward(self, coeffs): """ Args: coeffs (yl, yh): tuple of lowpass and bandpass coefficients, should match the format returned by DWT1DForward. Returns: Reconstructed input of shape :math:`(N, C_{in}, L_{in})` Note: Can have None for any of the highpass scales and will treat the values as zeros (not in an efficient way though). """ x0, highs = coeffs assert x0.ndim == 3, "Can only handle 3d inputs (N, C, L)" mode = mode_to_int(self.mode) # Do a multilevel inverse transform for x1 in highs[::-1]: if x1 is None: x1 = torch.zeros_like(x0) # 'Unpad' added signal if x0.shape[-1] > x1.shape[-1]: x0 = x0[..., :-1] x0 = SFB1D.apply(x0, x1, self.g0, self.g1, mode, self.use_amp) return x0 def roll(x, n, dim, make_even=False): if n < 0: n = x.shape[dim] + n if make_even and x.shape[dim] % 2 == 1: end = 1 else: end = 0 if dim == 0: return torch.cat((x[-n:], x[:-n + end]), dim=0) elif dim == 1: return torch.cat((x[:, -n:], x[:, :-n + end]), dim=1) elif dim == 2 or dim == -2: return torch.cat((x[:, :, -n:], x[:, :, :-n + end]), dim=2) elif dim == 3 or dim == -1: return torch.cat((x[:, :, :, -n:], x[:, :, :, :-n + end]), dim=3) def mypad(x, pad, mode='constant', value=0): """ Function to do numpy like padding on tensors. Only works for 2-D padding. Inputs: x (tensor): tensor to pad pad (tuple): tuple of (left, right, top, bottom) pad sizes mode (str): 'symmetric', 'wrap', 'constant, 'reflect', 'replicate', or 'zero'. The padding technique. """ if mode == 'symmetric': # Vertical only if pad[0] == 0 and pad[1] == 0: m1, m2 = pad[2], pad[3] l = x.shape[-2] xe = reflect(np.arange(-m1, l + m2, dtype='int32'), -0.5, l - 0.5) return x[:, :, xe] # horizontal only elif pad[2] == 0 and pad[3] == 0: m1, m2 = pad[0], pad[1] l = x.shape[-1] xe = reflect(np.arange(-m1, l + m2, dtype='int32'), -0.5, l - 0.5) return x[:, :, :, xe] # Both else: m1, m2 = pad[0], pad[1] l1 = x.shape[-1] xe_row = reflect(np.arange(-m1, l1 + m2, dtype='int32'), -0.5, l1 - 0.5) m1, m2 = pad[2], pad[3] l2 = x.shape[-2] xe_col = reflect(np.arange(-m1, l2 + m2, dtype='int32'), -0.5, l2 - 0.5) i = np.outer(xe_col, np.ones(xe_row.shape[0])) j = np.outer(np.ones(xe_col.shape[0]), xe_row) return x[:, :, i, j] elif mode == 'periodic': # Vertical only if pad[0] == 0 and pad[1] == 0: xe = np.arange(x.shape[-2]) xe = np.pad(xe, (pad[2], pad[3]), mode='wrap') return x[:, :, xe] # Horizontal only elif pad[2] == 0 and pad[3] == 0: xe = np.arange(x.shape[-1]) xe = np.pad(xe, (pad[0], pad[1]), mode='wrap') return x[:, :, :, xe] # Both else: xe_col = np.arange(x.shape[-2]) xe_col = np.pad(xe_col, (pad[2], pad[3]), mode='wrap') xe_row = np.arange(x.shape[-1]) xe_row = np.pad(xe_row, (pad[0], pad[1]), mode='wrap') i = np.outer(xe_col, np.ones(xe_row.shape[0])) j = np.outer(np.ones(xe_col.shape[0]), xe_row) return x[:, :, i, j] elif mode == 'constant' or mode == 'reflect' or mode == 'replicate': return F.pad(x, pad, mode, value) elif mode == 'zero': return F.pad(x, pad) else: raise ValueError("Unkown pad type: {}".format(mode)) def afb1d(x, h0, h1, use_amp, mode='zero', dim=-1): """ 1D analysis filter bank (along one dimension only) of an image Inputs: x (tensor): 4D input with the last two dimensions the spatial input h0 (tensor): 4D input for the lowpass filter. Should have shape (1, 1, h, 1) or (1, 1, 1, w) h1 (tensor): 4D input for the highpass filter. Should have shape (1, 1, h, 1) or (1, 1, 1, w) mode (str): padding method dim (int) - dimension of filtering. d=2 is for a vertical filter (called column filtering but filters across the rows). d=3 is for a horizontal filter, (called row filtering but filters across the columns). Returns: lohi: lowpass and highpass subbands concatenated along the channel dimension """ C = x.shape[1] # Convert the dim to positive d = dim % 4 s = (2, 1) if d == 2 else (1, 2) N = x.shape[d] # If h0, h1 are not tensors, make them. If they are, then assume that they # are in the right order if not isinstance(h0, torch.Tensor): h0 = torch.tensor(np.copy(np.array(h0).ravel()[::-1]), dtype=torch.float, device=x.device) if not isinstance(h1, torch.Tensor): h1 = torch.tensor(np.copy(np.array(h1).ravel()[::-1]), dtype=torch.float, device=x.device) L = h0.numel() L2 = L // 2 shape = [1, 1, 1, 1] shape[d] = L # If h aren't in the right shape, make them so if h0.shape != tuple(shape): h0 = h0.reshape(*shape) if h1.shape != tuple(shape): h1 = h1.reshape(*shape) h = torch.cat([h0, h1] * C, dim=0) if mode == 'per' or mode == 'periodization': if x.shape[dim] % 2 == 1: if d == 2: x = torch.cat((x, x[:, :, -1:]), dim=2) else: x = torch.cat((x, x[:, :, :, -1:]), dim=3) N += 1 x = roll(x, -L2, dim=d) pad = (L - 1, 0) if d == 2 else (0, L - 1) if use_amp: with torch.cuda.amp.autocast(): # for mixed precision lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C) else: lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C) N2 = N // 2 if d == 2: lohi[:, :, :L2] = lohi[:, :, :L2] + lohi[:, :, N2:N2 + L2] lohi = lohi[:, :, :N2] else: lohi[:, :, :, :L2] = lohi[:, :, :, :L2] + lohi[:, :, :, N2:N2 + L2] lohi = lohi[:, :, :, :N2] else: # Calculate the pad size outsize = pywt.dwt_coeff_len(N, L, mode=mode) p = 2 * (outsize - 1) - N + L if mode == 'zero': # Sadly, pytorch only allows for same padding before and after, if # we need to do more padding after for odd length signals, have to # prepad if p % 2 == 1: pad = (0, 0, 0, 1) if d == 2 else (0, 1, 0, 0) x = F.pad(x, pad) pad = (p // 2, 0) if d == 2 else (0, p // 2) # Calculate the high and lowpass if use_amp: with torch.cuda.amp.autocast(): lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C) else: lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C) elif mode == 'symmetric' or mode == 'reflect' or mode == 'periodic': pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0) x = mypad(x, pad=pad, mode=mode) if use_amp: with torch.cuda.amp.autocast(): lohi = F.conv2d(x, h, stride=s, groups=C) else: lohi = F.conv2d(x, h, stride=s, groups=C) else: raise ValueError("Unkown pad type: {}".format(mode)) return lohi def afb1d_atrous(x, h0, h1, mode='periodic', dim=-1, dilation=1): """ 1D analysis filter bank (along one dimension only) of an image without downsampling. Does the a trous algorithm. Inputs: x (tensor): 4D input with the last two dimensions the spatial input h0 (tensor): 4D input for the lowpass filter. Should have shape (1, 1, h, 1) or (1, 1, 1, w) h1 (tensor): 4D input for the highpass filter. Should have shape (1, 1, h, 1) or (1, 1, 1, w) mode (str): padding method dim (int) - dimension of filtering. d=2 is for a vertical filter (called column filtering but filters across the rows). d=3 is for a horizontal filter, (called row filtering but filters across the columns). dilation (int): dilation factor. Should be a power of 2. Returns: lohi: lowpass and highpass subbands concatenated along the channel dimension """ C = x.shape[1] # Convert the dim to positive d = dim % 4 # If h0, h1 are not tensors, make them. If they are, then assume that they # are in the right order if not isinstance(h0, torch.Tensor): h0 = torch.tensor(np.copy(np.array(h0).ravel()[::-1]), dtype=torch.float, device=x.device) if not isinstance(h1, torch.Tensor): h1 = torch.tensor(np.copy(np.array(h1).ravel()[::-1]), dtype=torch.float, device=x.device) L = h0.numel() shape = [1, 1, 1, 1] shape[d] = L # If h aren't in the right shape, make them so if h0.shape != tuple(shape): h0 = h0.reshape(*shape) if h1.shape != tuple(shape): h1 = h1.reshape(*shape) h = torch.cat([h0, h1] * C, dim=0) # Calculate the pad size L2 = (L * dilation) // 2 pad = (0, 0, L2 - dilation, L2) if d == 2 else (L2 - dilation, L2, 0, 0) x = mypad(x, pad=pad, mode=mode) lohi = F.conv2d(x, h, groups=C, dilation=dilation) return lohi def sfb1d(lo, hi, g0, g1, use_amp, mode='zero', dim=-1): """ 1D synthesis filter bank of an image tensor """ C = lo.shape[1] d = dim % 4 # If g0, g1 are not tensors, make them. If they are, then assume that they # are in the right order if not isinstance(g0, torch.Tensor): g0 = torch.tensor(np.copy(np.array(g0).ravel()), dtype=torch.float, device=lo.device) if not isinstance(g1, torch.Tensor): g1 = torch.tensor(np.copy(np.array(g1).ravel()), dtype=torch.float, device=lo.device) L = g0.numel() shape = [1, 1, 1, 1] shape[d] = L N = 2 * lo.shape[d] # If g aren't in the right shape, make them so if g0.shape != tuple(shape): g0 = g0.reshape(*shape) if g1.shape != tuple(shape): g1 = g1.reshape(*shape) s = (2, 1) if d == 2 else (1, 2) g0 = torch.cat([g0] * C, dim=0) g1 = torch.cat([g1] * C, dim=0) if mode == 'per' or mode == 'periodization': if use_amp: with torch.cuda.amp.autocast(): y = F.conv_transpose2d(lo, g0, stride=s, groups=C) + \ F.conv_transpose2d(hi, g1, stride=s, groups=C) else: y = F.conv_transpose2d(lo, g0, stride=s, groups=C) + \ F.conv_transpose2d(hi, g1, stride=s, groups=C) if d == 2: y[:, :, :L - 2] = y[:, :, :L - 2] + y[:, :, N:N + L - 2] y = y[:, :, :N] else: y[:, :, :, :L - 2] = y[:, :, :, :L - 2] + y[:, :, :, N:N + L - 2] y = y[:, :, :, :N] y = roll(y, 1 - L // 2, dim=dim) else: if mode == 'zero' or mode == 'symmetric' or mode == 'reflect' or \ mode == 'periodic': pad = (L - 2, 0) if d == 2 else (0, L - 2) if use_amp: with torch.cuda.amp.autocast(): y = F.conv_transpose2d(lo, g0, stride=s, padding=pad, groups=C) + \ F.conv_transpose2d(hi, g1, stride=s, padding=pad, groups=C) else: y = F.conv_transpose2d(lo, g0, stride=s, padding=pad, groups=C) + \ F.conv_transpose2d(hi, g1, stride=s, padding=pad, groups=C) else: raise ValueError("Unkown pad type: {}".format(mode)) return y def mode_to_int(mode): if mode == 'zero': return 0 elif mode == 'symmetric': return 1 elif mode == 'per' or mode == 'periodization': return 2 elif mode == 'constant': return 3 elif mode == 'reflect': return 4 elif mode == 'replicate': return 5 elif mode == 'periodic': return 6 else: raise ValueError("Unkown pad type: {}".format(mode)) def int_to_mode(mode): if mode == 0: return 'zero' elif mode == 1: return 'symmetric' elif mode == 2: return 'periodization' elif mode == 3: return 'constant' elif mode == 4: return 'reflect' elif mode == 5: return 'replicate' elif mode == 6: return 'periodic' else: raise ValueError("Unkown pad type: {}".format(mode)) class AFB2D(Function): """ Does a single level 2d wavelet decomposition of an input. Does separate row and column filtering by two calls to :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d` Needs to have the tensors in the right form. Because this function defines its own backward pass, saves on memory by not having to save the input tensors. Inputs: x (torch.Tensor): Input to decompose h0_row: row lowpass h1_row: row highpass h0_col: col lowpass h1_col: col highpass mode (int): use mode_to_int to get the int code here We encode the mode as an integer rather than a string as gradcheck causes an error when a string is provided. Returns: y: Tensor of shape (N, C*4, H, W) """ @staticmethod def forward(ctx, x, h0_row, h1_row, h0_col, h1_col, mode): ctx.save_for_backward(h0_row, h1_row, h0_col, h1_col) ctx.shape = x.shape[-2:] mode = int_to_mode(mode) ctx.mode = mode lohi = afb1d(x, h0_row, h1_row, mode=mode, dim=3) y = afb1d(lohi, h0_col, h1_col, mode=mode, dim=2) s = y.shape y = y.reshape(s[0], -1, 4, s[-2], s[-1]) low = y[:, :, 0].contiguous() highs = y[:, :, 1:].contiguous() return low, highs @staticmethod def backward(ctx, low, highs): dx = None if ctx.needs_input_grad[0]: mode = ctx.mode h0_row, h1_row, h0_col, h1_col = ctx.saved_tensors lh, hl, hh = torch.unbind(highs, dim=2) lo = sfb1d(low, lh, h0_col, h1_col, mode=mode, dim=2) hi = sfb1d(hl, hh, h0_col, h1_col, mode=mode, dim=2) dx = sfb1d(lo, hi, h0_row, h1_row, mode=mode, dim=3) if dx.shape[-2] > ctx.shape[-2] and dx.shape[-1] > ctx.shape[-1]: dx = dx[:, :, :ctx.shape[-2], :ctx.shape[-1]] elif dx.shape[-2] > ctx.shape[-2]: dx = dx[:, :, :ctx.shape[-2]] elif dx.shape[-1] > ctx.shape[-1]: dx = dx[:, :, :, :ctx.shape[-1]] return dx, None, None, None, None, None class AFB1D(Function): """ Does a single level 1d wavelet decomposition of an input. Needs to have the tensors in the right form. Because this function defines its own backward pass, saves on memory by not having to save the input tensors. Inputs: x (torch.Tensor): Input to decompose h0: lowpass h1: highpass mode (int): use mode_to_int to get the int code here We encode the mode as an integer rather than a string as gradcheck causes an error when a string is provided. Returns: x0: Tensor of shape (N, C, L') - lowpass x1: Tensor of shape (N, C, L') - highpass """ @staticmethod def forward(ctx, x, h0, h1, mode, use_amp): mode = int_to_mode(mode) # Make inputs 4d x = x[:, :, None, :] h0 = h0[:, :, None, :] h1 = h1[:, :, None, :] # Save for backwards ctx.save_for_backward(h0, h1) ctx.shape = x.shape[3] ctx.mode = mode ctx.use_amp = use_amp lohi = afb1d(x, h0, h1, use_amp, mode=mode, dim=3) x0 = lohi[:, ::2, 0].contiguous() x1 = lohi[:, 1::2, 0].contiguous() return x0, x1 @staticmethod def backward(ctx, dx0, dx1): dx = None if ctx.needs_input_grad[0]: mode = ctx.mode h0, h1 = ctx.saved_tensors use_amp = ctx.use_amp # Make grads 4d dx0 = dx0[:, :, None, :] dx1 = dx1[:, :, None, :] dx = sfb1d(dx0, dx1, h0, h1, use_amp, mode=mode, dim=3)[:, :, 0] # Check for odd input if dx.shape[2] > ctx.shape: dx = dx[:, :, :ctx.shape] return dx, None, None, None, None, None def afb2d(x, filts, mode='zero'): """ Does a single level 2d wavelet decomposition of an input. Does separate row and column filtering by two calls to :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d` Inputs: x (torch.Tensor): Input to decompose filts (list of ndarray or torch.Tensor): If a list of tensors has been given, this function assumes they are in the right form (the form returned by :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`). Otherwise, this function will prepare the filters to be of the right form by calling :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`. mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which padding to use. If periodization, the output size will be half the input size. Otherwise, the output size will be slightly larger than half. Returns: y: Tensor of shape (N, C*4, H, W) """ tensorize = [not isinstance(f, torch.Tensor) for f in filts] if len(filts) == 2: h0, h1 = filts if True in tensorize: h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d( h0, h1, device=x.device) else: h0_col = h0 h0_row = h0.transpose(2, 3) h1_col = h1 h1_row = h1.transpose(2, 3) elif len(filts) == 4: if True in tensorize: h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d( *filts, device=x.device) else: h0_col, h1_col, h0_row, h1_row = filts else: raise ValueError("Unknown form for input filts") lohi = afb1d(x, h0_row, h1_row, mode=mode, dim=3) y = afb1d(lohi, h0_col, h1_col, mode=mode, dim=2) return y def afb2d_atrous(x, filts, mode='periodization', dilation=1): """ Does a single level 2d wavelet decomposition of an input. Does separate row and column filtering by two calls to :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d` Inputs: x (torch.Tensor): Input to decompose filts (list of ndarray or torch.Tensor): If a list of tensors has been given, this function assumes they are in the right form (the form returned by :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`). Otherwise, this function will prepare the filters to be of the right form by calling :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`. mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which padding to use. If periodization, the output size will be half the input size. Otherwise, the output size will be slightly larger than half. dilation (int): dilation factor for the filters. Should be 2**level Returns: y: Tensor of shape (N, C, 4, H, W) """ tensorize = [not isinstance(f, torch.Tensor) for f in filts] if len(filts) == 2: h0, h1 = filts if True in tensorize: h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d( h0, h1, device=x.device) else: h0_col = h0 h0_row = h0.transpose(2, 3) h1_col = h1 h1_row = h1.transpose(2, 3) elif len(filts) == 4: if True in tensorize: h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d( *filts, device=x.device) else: h0_col, h1_col, h0_row, h1_row = filts else: raise ValueError("Unknown form for input filts") lohi = afb1d_atrous(x, h0_row, h1_row, mode=mode, dim=3, dilation=dilation) y = afb1d_atrous(lohi, h0_col, h1_col, mode=mode, dim=2, dilation=dilation) return y def afb2d_nonsep(x, filts, mode='zero'): """ Does a 1 level 2d wavelet decomposition of an input. Doesn't do separate row and column filtering. Inputs: x (torch.Tensor): Input to decompose filts (list or torch.Tensor): If a list is given, should be the low and highpass filter banks. If a tensor is given, it should be of the form created by :py:func:`pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d_nonsep` mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which padding to use. If periodization, the output size will be half the input size. Otherwise, the output size will be slightly larger than half. Returns: y: Tensor of shape (N, C, 4, H, W) """ C = x.shape[1] Ny = x.shape[2] Nx = x.shape[3] # Check the filter inputs if isinstance(filts, (tuple, list)): if len(filts) == 2: filts = prep_filt_afb2d_nonsep(filts[0], filts[1], device=x.device) else: filts = prep_filt_afb2d_nonsep( filts[0], filts[1], filts[2], filts[3], device=x.device) f = torch.cat([filts] * C, dim=0) Ly = f.shape[2] Lx = f.shape[3] if mode == 'periodization' or mode == 'per': if x.shape[2] % 2 == 1: x = torch.cat((x, x[:, :, -1:]), dim=2) Ny += 1 if x.shape[3] % 2 == 1: x = torch.cat((x, x[:, :, :, -1:]), dim=3) Nx += 1 pad = (Ly - 1, Lx - 1) stride = (2, 2) x = roll(roll(x, -Ly // 2, dim=2), -Lx // 2, dim=3) y = F.conv2d(x, f, padding=pad, stride=stride, groups=C) y[:, :, :Ly // 2] += y[:, :, Ny // 2:Ny // 2 + Ly // 2] y[:, :, :, :Lx // 2] += y[:, :, :, Nx // 2:Nx // 2 + Lx // 2] y = y[:, :, :Ny // 2, :Nx // 2] elif mode == 'zero' or mode == 'symmetric' or mode == 'reflect': # Calculate the pad size out1 = pywt.dwt_coeff_len(Ny, Ly, mode=mode) out2 = pywt.dwt_coeff_len(Nx, Lx, mode=mode) p1 = 2 * (out1 - 1) - Ny + Ly p2 = 2 * (out2 - 1) - Nx + Lx if mode == 'zero': # Sadly, pytorch only allows for same padding before and after, if # we need to do more padding after for odd length signals, have to # prepad if p1 % 2 == 1 and p2 % 2 == 1: x = F.pad(x, (0, 1, 0, 1)) elif p1 % 2 == 1: x = F.pad(x, (0, 0, 0, 1)) elif p2 % 2 == 1: x = F.pad(x, (0, 1, 0, 0)) # Calculate the high and lowpass y = F.conv2d( x, f, padding=(p1 // 2, p2 // 2), stride=2, groups=C) elif mode == 'symmetric' or mode == 'reflect' or mode == 'periodic': pad = (p2 // 2, (p2 + 1) // 2, p1 // 2, (p1 + 1) // 2) x = mypad(x, pad=pad, mode=mode) y = F.conv2d(x, f, stride=2, groups=C) else: raise ValueError("Unkown pad type: {}".format(mode)) return y def sfb2d(ll, lh, hl, hh, filts, mode='zero'): """ Does a single level 2d wavelet reconstruction of wavelet coefficients. Does separate row and column filtering by two calls to :py:func:`pytorch_wavelets.dwt.lowlevel.sfb1d` Inputs: ll (torch.Tensor): lowpass coefficients lh (torch.Tensor): horizontal coefficients hl (torch.Tensor): vertical coefficients hh (torch.Tensor): diagonal coefficients filts (list of ndarray or torch.Tensor): If a list of tensors has been given, this function assumes they are in the right form (the form returned by :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d`). Otherwise, this function will prepare the filters to be of the right form by calling :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d`. mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which padding to use. If periodization, the output size will be half the input size. Otherwise, the output size will be slightly larger than half. """ tensorize = [not isinstance(x, torch.Tensor) for x in filts] if len(filts) == 2: g0, g1 = filts if True in tensorize: g0_col, g1_col, g0_row, g1_row = prep_filt_sfb2d(g0, g1) else: g0_col = g0 g0_row = g0.transpose(2, 3) g1_col = g1 g1_row = g1.transpose(2, 3) elif len(filts) == 4: if True in tensorize: g0_col, g1_col, g0_row, g1_row = prep_filt_sfb2d(*filts) else: g0_col, g1_col, g0_row, g1_row = filts else: raise ValueError("Unknown form for input filts") lo = sfb1d(ll, lh, g0_col, g1_col, mode=mode, dim=2) hi = sfb1d(hl, hh, g0_col, g1_col, mode=mode, dim=2) y = sfb1d(lo, hi, g0_row, g1_row, mode=mode, dim=3) return y class SFB2D(Function): """ Does a single level 2d wavelet decomposition of an input. Does separate row and column filtering by two calls to :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d` Needs to have the tensors in the right form. Because this function defines its own backward pass, saves on memory by not having to save the input tensors. Inputs: x (torch.Tensor): Input to decompose h0_row: row lowpass h1_row: row highpass h0_col: col lowpass h1_col: col highpass mode (int): use mode_to_int to get the int code here We encode the mode as an integer rather than a string as gradcheck causes an error when a string is provided. Returns: y: Tensor of shape (N, C*4, H, W) """ @staticmethod def forward(ctx, low, highs, g0_row, g1_row, g0_col, g1_col, mode): mode = int_to_mode(mode) ctx.mode = mode ctx.save_for_backward(g0_row, g1_row, g0_col, g1_col) lh, hl, hh = torch.unbind(highs, dim=2) lo = sfb1d(low, lh, g0_col, g1_col, mode=mode, dim=2) hi = sfb1d(hl, hh, g0_col, g1_col, mode=mode, dim=2) y = sfb1d(lo, hi, g0_row, g1_row, mode=mode, dim=3) return y @staticmethod def backward(ctx, dy): dlow, dhigh = None, None if ctx.needs_input_grad[0]: mode = ctx.mode g0_row, g1_row, g0_col, g1_col = ctx.saved_tensors dx = afb1d(dy, g0_row, g1_row, mode=mode, dim=3) dx = afb1d(dx, g0_col, g1_col, mode=mode, dim=2) s = dx.shape dx = dx.reshape(s[0], -1, 4, s[-2], s[-1]) dlow = dx[:, :, 0].contiguous() dhigh = dx[:, :, 1:].contiguous() return dlow, dhigh, None, None, None, None, None class SFB1D(Function): """ Does a single level 1d wavelet decomposition of an input. Needs to have the tensors in the right form. Because this function defines its own backward pass, saves on memory by not having to save the input tensors. Inputs: low (torch.Tensor): Lowpass to reconstruct of shape (N, C, L) high (torch.Tensor): Highpass to reconstruct of shape (N, C, L) g0: lowpass g1: highpass mode (int): use mode_to_int to get the int code here We encode the mode as an integer rather than a string as gradcheck causes an error when a string is provided. Returns: y: Tensor of shape (N, C*2, L') """ @staticmethod def forward(ctx, low, high, g0, g1, mode, use_amp): mode = int_to_mode(mode) # Make into a 2d tensor with 1 row low = low[:, :, None, :] high = high[:, :, None, :] g0 = g0[:, :, None, :] g1 = g1[:, :, None, :] ctx.mode = mode ctx.save_for_backward(g0, g1) ctx.use_amp = use_amp return sfb1d(low, high, g0, g1, use_amp, mode=mode, dim=3)[:, :, 0] @staticmethod def backward(ctx, dy): dlow, dhigh = None, None if ctx.needs_input_grad[0]: mode = ctx.mode use_amp = ctx.use_amp g0, g1, = ctx.saved_tensors dy = dy[:, :, None, :] dx = afb1d(dy, g0, g1, use_amp, mode=mode, dim=3) dlow = dx[:, ::2, 0].contiguous() dhigh = dx[:, 1::2, 0].contiguous() return dlow, dhigh, None, None, None, None, None def sfb2d_nonsep(coeffs, filts, mode='zero'): """ Does a single level 2d wavelet reconstruction of wavelet coefficients. Does not do separable filtering. Inputs: coeffs (torch.Tensor): tensor of coefficients of shape (N, C, 4, H, W) where the third dimension indexes across the (ll, lh, hl, hh) bands. filts (list of ndarray or torch.Tensor): If a list of tensors has been given, this function assumes they are in the right form (the form returned by :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d_nonsep`). Otherwise, this function will prepare the filters to be of the right form by calling :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d_nonsep`. mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which padding to use. If periodization, the output size will be half the input size. Otherwise, the output size will be slightly larger than half. """ C = coeffs.shape[1] Ny = coeffs.shape[-2] Nx = coeffs.shape[-1] # Check the filter inputs - should be in the form of a torch tensor, but if # not, tensorize it here. if isinstance(filts, (tuple, list)): if len(filts) == 2: filts = prep_filt_sfb2d_nonsep(filts[0], filts[1], device=coeffs.device) elif len(filts) == 4: filts = prep_filt_sfb2d_nonsep( filts[0], filts[1], filts[2], filts[3], device=coeffs.device) else: raise ValueError("Unkown form for input filts") f = torch.cat([filts] * C, dim=0) Ly = f.shape[2] Lx = f.shape[3] x = coeffs.reshape(coeffs.shape[0], -1, coeffs.shape[-2], coeffs.shape[-1]) if mode == 'periodization' or mode == 'per': ll = F.conv_transpose2d(x, f, groups=C, stride=2) ll[:, :, :Ly - 2] += ll[:, :, 2 * Ny:2 * Ny + Ly - 2] ll[:, :, :, :Lx - 2] += ll[:, :, :, 2 * Nx:2 * Nx + Lx - 2] ll = ll[:, :, :2 * Ny, :2 * Nx] ll = roll(roll(ll, 1 - Ly // 2, dim=2), 1 - Lx // 2, dim=3) elif mode == 'symmetric' or mode == 'zero' or mode == 'reflect' or \ mode == 'periodic': pad = (Ly - 2, Lx - 2) ll = F.conv_transpose2d(x, f, padding=pad, groups=C, stride=2) else: raise ValueError("Unkown pad type: {}".format(mode)) return ll.contiguous() def prep_filt_afb2d_nonsep(h0_col, h1_col, h0_row=None, h1_row=None, device=None): """ Prepares the filters to be of the right form for the afb2d_nonsep function. In particular, makes 2d point spread functions, and mirror images them in preparation to do torch.conv2d. Inputs: h0_col (array-like): low pass column filter bank h1_col (array-like): high pass column filter bank h0_row (array-like): low pass row filter bank. If none, will assume the same as column filter h1_row (array-like): high pass row filter bank. If none, will assume the same as column filter device: which device to put the tensors on to Returns: filts: (4, 1, h, w) tensor ready to get the four subbands """ h0_col = np.array(h0_col).ravel() h1_col = np.array(h1_col).ravel() if h0_row is None: h0_row = h0_col if h1_row is None: h1_row = h1_col ll = np.outer(h0_col, h0_row) lh = np.outer(h1_col, h0_row) hl = np.outer(h0_col, h1_row) hh = np.outer(h1_col, h1_row) filts = np.stack([ll[None, ::-1, ::-1], lh[None, ::-1, ::-1], hl[None, ::-1, ::-1], hh[None, ::-1, ::-1]], axis=0) filts = torch.tensor(filts, dtype=torch.get_default_dtype(), device=device) return filts def prep_filt_sfb2d_nonsep(g0_col, g1_col, g0_row=None, g1_row=None, device=None): """ Prepares the filters to be of the right form for the sfb2d_nonsep function. In particular, makes 2d point spread functions. Does not mirror image them as sfb2d_nonsep uses conv2d_transpose which acts like normal convolution. Inputs: g0_col (array-like): low pass column filter bank g1_col (array-like): high pass column filter bank g0_row (array-like): low pass row filter bank. If none, will assume the same as column filter g1_row (array-like): high pass row filter bank. If none, will assume the same as column filter device: which device to put the tensors on to Returns: filts: (4, 1, h, w) tensor ready to combine the four subbands """ g0_col = np.array(g0_col).ravel() g1_col = np.array(g1_col).ravel() if g0_row is None: g0_row = g0_col if g1_row is None: g1_row = g1_col ll = np.outer(g0_col, g0_row) lh = np.outer(g1_col, g0_row) hl = np.outer(g0_col, g1_row) hh = np.outer(g1_col, g1_row) filts = np.stack([ll[None], lh[None], hl[None], hh[None]], axis=0) filts = torch.tensor(filts, dtype=torch.get_default_dtype(), device=device) return filts def prep_filt_sfb2d(g0_col, g1_col, g0_row=None, g1_row=None, device=None): """ Prepares the filters to be of the right form for the sfb2d function. In particular, makes the tensors the right shape. It does not mirror image them as as sfb2d uses conv2d_transpose which acts like normal convolution. Inputs: g0_col (array-like): low pass column filter bank g1_col (array-like): high pass column filter bank g0_row (array-like): low pass row filter bank. If none, will assume the same as column filter g1_row (array-like): high pass row filter bank. If none, will assume the same as column filter device: which device to put the tensors on to Returns: (g0_col, g1_col, g0_row, g1_row) """ g0_col, g1_col = prep_filt_sfb1d(g0_col, g1_col, device) if g0_row is None: g0_row, g1_row = g0_col, g1_col else: g0_row, g1_row = prep_filt_sfb1d(g0_row, g1_row, device) g0_col = g0_col.reshape((1, 1, -1, 1)) g1_col = g1_col.reshape((1, 1, -1, 1)) g0_row = g0_row.reshape((1, 1, 1, -1)) g1_row = g1_row.reshape((1, 1, 1, -1)) return g0_col, g1_col, g0_row, g1_row def prep_filt_sfb1d(g0, g1, device=None): """ Prepares the filters to be of the right form for the sfb1d function. In particular, makes the tensors the right shape. It does not mirror image them as as sfb2d uses conv2d_transpose which acts like normal convolution. Inputs: g0 (array-like): low pass filter bank g1 (array-like): high pass filter bank device: which device to put the tensors on to Returns: (g0, g1) """ g0 = np.array(g0).ravel() g1 = np.array(g1).ravel() t = torch.get_default_dtype() g0 = torch.tensor(g0, device=device, dtype=t).reshape((1, 1, -1)) g1 = torch.tensor(g1, device=device, dtype=t).reshape((1, 1, -1)) return g0, g1 def prep_filt_afb2d(h0_col, h1_col, h0_row=None, h1_row=None, device=None): """ Prepares the filters to be of the right form for the afb2d function. In particular, makes the tensors the right shape. It takes mirror images of them as as afb2d uses conv2d which acts like normal correlation. Inputs: h0_col (array-like): low pass column filter bank h1_col (array-like): high pass column filter bank h0_row (array-like): low pass row filter bank. If none, will assume the same as column filter h1_row (array-like): high pass row filter bank. If none, will assume the same as column filter device: which device to put the tensors on to Returns: (h0_col, h1_col, h0_row, h1_row) """ h0_col, h1_col = prep_filt_afb1d(h0_col, h1_col, device) if h0_row is None: h0_row, h1_row = h0_col, h1_col else: h0_row, h1_row = prep_filt_afb1d(h0_row, h1_row, device) h0_col = h0_col.reshape((1, 1, -1, 1)) h1_col = h1_col.reshape((1, 1, -1, 1)) h0_row = h0_row.reshape((1, 1, 1, -1)) h1_row = h1_row.reshape((1, 1, 1, -1)) return h0_col, h1_col, h0_row, h1_row def prep_filt_afb1d(h0, h1, device=None): """ Prepares the filters to be of the right form for the afb2d function. In particular, makes the tensors the right shape. It takes mirror images of them as as afb2d uses conv2d which acts like normal correlation. Inputs: h0 (array-like): low pass column filter bank h1 (array-like): high pass column filter bank device: which device to put the tensors on to Returns: (h0, h1) """ h0 = np.array(h0[::-1]).ravel() h1 = np.array(h1[::-1]).ravel() t = torch.get_default_dtype() h0 = torch.tensor(h0, device=device, dtype=t).reshape((1, 1, -1)) h1 = torch.tensor(h1, device=device, dtype=t).reshape((1, 1, -1)) return h0, h1 def reflect(x, minx, maxx): """Reflect the values in matrix *x* about the scalar values *minx* and *maxx*. Hence a vector *x* containing a long linearly increasing series is converted into a waveform which ramps linearly up and down between *minx* and *maxx*. If *x* contains integers and *minx* and *maxx* are (integers + 0.5), the ramps will have repeated max and min samples. .. codeauthor:: Rich Wareham , Aug 2013 .. codeauthor:: Nick Kingsbury, Cambridge University, January 1999. """ x = np.asanyarray(x) rng = maxx - minx rng_by_2 = 2 * rng mod = np.fmod(x - minx, rng_by_2) normed_mod = np.where(mod < 0, mod + rng_by_2, mod) out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx return np.array(out, dtype=x.dtype) ================================================ FILE: layers/ETSformer_EncDec.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import torch.fft as fft from einops import rearrange, reduce, repeat import math, random from scipy.fftpack import next_fast_len class Transform: def __init__(self, sigma): self.sigma = sigma @torch.no_grad() def transform(self, x): return self.jitter(self.shift(self.scale(x))) def jitter(self, x): return x + (torch.randn(x.shape).to(x.device) * self.sigma) def scale(self, x): return x * (torch.randn(x.size(-1)).to(x.device) * self.sigma + 1) def shift(self, x): return x + (torch.randn(x.size(-1)).to(x.device) * self.sigma) def conv1d_fft(f, g, dim=-1): N = f.size(dim) M = g.size(dim) fast_len = next_fast_len(N + M - 1) F_f = fft.rfft(f, fast_len, dim=dim) F_g = fft.rfft(g, fast_len, dim=dim) F_fg = F_f * F_g.conj() out = fft.irfft(F_fg, fast_len, dim=dim) out = out.roll((-1,), dims=(dim,)) idx = torch.as_tensor(range(fast_len - N, fast_len)).to(out.device) out = out.index_select(dim, idx) return out class ExponentialSmoothing(nn.Module): def __init__(self, dim, nhead, dropout=0.1, aux=False): super().__init__() self._smoothing_weight = nn.Parameter(torch.randn(nhead, 1)) self.v0 = nn.Parameter(torch.randn(1, 1, nhead, dim)) self.dropout = nn.Dropout(dropout) if aux: self.aux_dropout = nn.Dropout(dropout) def forward(self, values, aux_values=None): b, t, h, d = values.shape init_weight, weight = self.get_exponential_weight(t) output = conv1d_fft(self.dropout(values), weight, dim=1) output = init_weight * self.v0 + output if aux_values is not None: aux_weight = weight / (1 - self.weight) * self.weight aux_output = conv1d_fft(self.aux_dropout(aux_values), aux_weight) output = output + aux_output return output def get_exponential_weight(self, T): # Generate array [0, 1, ..., T-1] powers = torch.arange(T, dtype=torch.float, device=self.weight.device) # (1 - \alpha) * \alpha^t, for all t = T-1, T-2, ..., 0] weight = (1 - self.weight) * (self.weight ** torch.flip(powers, dims=(0,))) # \alpha^t for all t = 1, 2, ..., T init_weight = self.weight ** (powers + 1) return rearrange(init_weight, 'h t -> 1 t h 1'), \ rearrange(weight, 'h t -> 1 t h 1') @property def weight(self): return torch.sigmoid(self._smoothing_weight) class Feedforward(nn.Module): def __init__(self, d_model, dim_feedforward, dropout=0.1, activation='sigmoid'): # Implementation of Feedforward model super().__init__() self.linear1 = nn.Linear(d_model, dim_feedforward, bias=False) self.dropout1 = nn.Dropout(dropout) self.linear2 = nn.Linear(dim_feedforward, d_model, bias=False) self.dropout2 = nn.Dropout(dropout) self.activation = getattr(F, activation) def forward(self, x): x = self.linear2(self.dropout1(self.activation(self.linear1(x)))) return self.dropout2(x) class GrowthLayer(nn.Module): def __init__(self, d_model, nhead, d_head=None, dropout=0.1): super().__init__() self.d_head = d_head or (d_model // nhead) self.d_model = d_model self.nhead = nhead self.z0 = nn.Parameter(torch.randn(self.nhead, self.d_head)) self.in_proj = nn.Linear(self.d_model, self.d_head * self.nhead) self.es = ExponentialSmoothing(self.d_head, self.nhead, dropout=dropout) self.out_proj = nn.Linear(self.d_head * self.nhead, self.d_model) assert self.d_head * self.nhead == self.d_model, "d_model must be divisible by nhead" def forward(self, inputs): """ :param inputs: shape: (batch, seq_len, dim) :return: shape: (batch, seq_len, dim) """ b, t, d = inputs.shape values = self.in_proj(inputs).view(b, t, self.nhead, -1) values = torch.cat([repeat(self.z0, 'h d -> b 1 h d', b=b), values], dim=1) values = values[:, 1:] - values[:, :-1] out = self.es(values) out = torch.cat([repeat(self.es.v0, '1 1 h d -> b 1 h d', b=b), out], dim=1) out = rearrange(out, 'b t h d -> b t (h d)') return self.out_proj(out) class FourierLayer(nn.Module): def __init__(self, d_model, pred_len, k=None, low_freq=1): super().__init__() self.d_model = d_model self.pred_len = pred_len self.k = k self.low_freq = low_freq def forward(self, x): """x: (b, t, d)""" b, t, d = x.shape x_freq = fft.rfft(x, dim=1) if t % 2 == 0: x_freq = x_freq[:, self.low_freq:-1] f = fft.rfftfreq(t)[self.low_freq:-1] else: x_freq = x_freq[:, self.low_freq:] f = fft.rfftfreq(t)[self.low_freq:] x_freq, index_tuple = self.topk_freq(x_freq) f = repeat(f, 'f -> b f d', b=x_freq.size(0), d=x_freq.size(2)) f = rearrange(f[index_tuple], 'b f d -> b f () d').to(x_freq.device) return self.extrapolate(x_freq, f, t) def extrapolate(self, x_freq, f, t): x_freq = torch.cat([x_freq, x_freq.conj()], dim=1) f = torch.cat([f, -f], dim=1) t_val = rearrange(torch.arange(t + self.pred_len, dtype=torch.float), 't -> () () t ()').to(x_freq.device) amp = rearrange(x_freq.abs() / t, 'b f d -> b f () d') phase = rearrange(x_freq.angle(), 'b f d -> b f () d') x_time = amp * torch.cos(2 * math.pi * f * t_val + phase) return reduce(x_time, 'b f t d -> b t d', 'sum') def topk_freq(self, x_freq): values, indices = torch.topk(x_freq.abs(), self.k, dim=1, largest=True, sorted=True) mesh_a, mesh_b = torch.meshgrid(torch.arange(x_freq.size(0)), torch.arange(x_freq.size(2))) index_tuple = (mesh_a.unsqueeze(1).to(indices.device), indices, mesh_b.unsqueeze(1).to(indices.device)) x_freq = x_freq[index_tuple] return x_freq, index_tuple class LevelLayer(nn.Module): def __init__(self, d_model, c_out, dropout=0.1): super().__init__() self.d_model = d_model self.c_out = c_out self.es = ExponentialSmoothing(1, self.c_out, dropout=dropout, aux=True) self.growth_pred = nn.Linear(self.d_model, self.c_out) self.season_pred = nn.Linear(self.d_model, self.c_out) def forward(self, level, growth, season): b, t, _ = level.shape growth = self.growth_pred(growth).view(b, t, self.c_out, 1) season = self.season_pred(season).view(b, t, self.c_out, 1) growth = growth.view(b, t, self.c_out, 1) season = season.view(b, t, self.c_out, 1) level = level.view(b, t, self.c_out, 1) out = self.es(level - season, aux_values=growth) out = rearrange(out, 'b t h d -> b t (h d)') return out class EncoderLayer(nn.Module): def __init__(self, d_model, nhead, c_out, seq_len, pred_len, k, dim_feedforward=None, dropout=0.1, activation='sigmoid', layer_norm_eps=1e-5): super().__init__() self.d_model = d_model self.nhead = nhead self.c_out = c_out self.seq_len = seq_len self.pred_len = pred_len dim_feedforward = dim_feedforward or 4 * d_model self.dim_feedforward = dim_feedforward self.growth_layer = GrowthLayer(d_model, nhead, dropout=dropout) self.seasonal_layer = FourierLayer(d_model, pred_len, k=k) self.level_layer = LevelLayer(d_model, c_out, dropout=dropout) # Implementation of Feedforward model self.ff = Feedforward(d_model, dim_feedforward, dropout=dropout, activation=activation) self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) def forward(self, res, level, attn_mask=None): season = self._season_block(res) res = res - season[:, :-self.pred_len] growth = self._growth_block(res) res = self.norm1(res - growth[:, 1:]) res = self.norm2(res + self.ff(res)) level = self.level_layer(level, growth[:, :-1], season[:, :-self.pred_len]) return res, level, growth, season def _growth_block(self, x): x = self.growth_layer(x) return self.dropout1(x) def _season_block(self, x): x = self.seasonal_layer(x) return self.dropout2(x) class Encoder(nn.Module): def __init__(self, layers): super().__init__() self.layers = nn.ModuleList(layers) def forward(self, res, level, attn_mask=None): growths = [] seasons = [] for layer in self.layers: res, level, growth, season = layer(res, level, attn_mask=None) growths.append(growth) seasons.append(season) return level, growths, seasons class DampingLayer(nn.Module): def __init__(self, pred_len, nhead, dropout=0.1): super().__init__() self.pred_len = pred_len self.nhead = nhead self._damping_factor = nn.Parameter(torch.randn(1, nhead)) self.dropout = nn.Dropout(dropout) def forward(self, x): x = repeat(x, 'b 1 d -> b t d', t=self.pred_len) b, t, d = x.shape powers = torch.arange(self.pred_len).to(self._damping_factor.device) + 1 powers = powers.view(self.pred_len, 1) damping_factors = self.damping_factor ** powers damping_factors = damping_factors.cumsum(dim=0) x = x.view(b, t, self.nhead, -1) x = self.dropout(x) * damping_factors.unsqueeze(-1) return x.view(b, t, d) @property def damping_factor(self): return torch.sigmoid(self._damping_factor) class DecoderLayer(nn.Module): def __init__(self, d_model, nhead, c_out, pred_len, dropout=0.1): super().__init__() self.d_model = d_model self.nhead = nhead self.c_out = c_out self.pred_len = pred_len self.growth_damping = DampingLayer(pred_len, nhead, dropout=dropout) self.dropout1 = nn.Dropout(dropout) def forward(self, growth, season): growth_horizon = self.growth_damping(growth[:, -1:]) growth_horizon = self.dropout1(growth_horizon) seasonal_horizon = season[:, -self.pred_len:] return growth_horizon, seasonal_horizon class Decoder(nn.Module): def __init__(self, layers): super().__init__() self.d_model = layers[0].d_model self.c_out = layers[0].c_out self.pred_len = layers[0].pred_len self.nhead = layers[0].nhead self.layers = nn.ModuleList(layers) self.pred = nn.Linear(self.d_model, self.c_out) def forward(self, growths, seasons): growth_repr = [] season_repr = [] for idx, layer in enumerate(self.layers): growth_horizon, season_horizon = layer(growths[idx], seasons[idx]) growth_repr.append(growth_horizon) season_repr.append(season_horizon) growth_repr = sum(growth_repr) season_repr = sum(season_repr) return self.pred(growth_repr), self.pred(season_repr) ================================================ FILE: layers/Embed.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.nn.utils import weight_norm import math class PositionalEmbedding(nn.Module): def __init__(self, d_model, max_len=5000): super(PositionalEmbedding, self).__init__() # Compute the positional encodings once in log space. pe = torch.zeros(max_len, d_model).float() pe.require_grad = False position = torch.arange(0, max_len).float().unsqueeze(1) div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer('pe', pe) def forward(self, x): return self.pe[:, :x.size(1)] class TokenEmbedding(nn.Module): def __init__(self, c_in, d_model): super(TokenEmbedding, self).__init__() padding = 1 if torch.__version__ >= '1.5.0' else 2 self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, kernel_size=3, padding=padding, padding_mode='circular', bias=False) for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_( m.weight, mode='fan_in', nonlinearity='leaky_relu') def forward(self, x): x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) return x class FixedEmbedding(nn.Module): def __init__(self, c_in, d_model): super(FixedEmbedding, self).__init__() w = torch.zeros(c_in, d_model).float() w.require_grad = False position = torch.arange(0, c_in).float().unsqueeze(1) div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() w[:, 0::2] = torch.sin(position * div_term) w[:, 1::2] = torch.cos(position * div_term) self.emb = nn.Embedding(c_in, d_model) self.emb.weight = nn.Parameter(w, requires_grad=False) def forward(self, x): return self.emb(x).detach() class TemporalEmbedding(nn.Module): def __init__(self, d_model, embed_type='fixed', freq='h'): super(TemporalEmbedding, self).__init__() minute_size = 4 hour_size = 24 weekday_size = 7 day_size = 32 month_size = 13 Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding if freq == 't': self.minute_embed = Embed(minute_size, d_model) self.hour_embed = Embed(hour_size, d_model) self.weekday_embed = Embed(weekday_size, d_model) self.day_embed = Embed(day_size, d_model) self.month_embed = Embed(month_size, d_model) def forward(self, x): x = x.long() minute_x = self.minute_embed(x[:, :, 4]) if hasattr( self, 'minute_embed') else 0. hour_x = self.hour_embed(x[:, :, 3]) weekday_x = self.weekday_embed(x[:, :, 2]) day_x = self.day_embed(x[:, :, 1]) month_x = self.month_embed(x[:, :, 0]) return hour_x + weekday_x + day_x + month_x + minute_x class TimeFeatureEmbedding(nn.Module): def __init__(self, d_model, embed_type='timeF', freq='h'): super(TimeFeatureEmbedding, self).__init__() freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} d_inp = freq_map[freq] self.embed = nn.Linear(d_inp, d_model, bias=False) def forward(self, x): return self.embed(x) class DataEmbedding(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( d_model=d_model, embed_type=embed_type, freq=freq) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): if x_mark is None: x = self.value_embedding(x) + self.position_embedding(x) else: x = self.value_embedding( x) + self.temporal_embedding(x_mark) + self.position_embedding(x) return self.dropout(x) class DataEmbedding_inverted(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_inverted, self).__init__() self.value_embedding = nn.Linear(c_in, d_model) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): x = x.permute(0, 2, 1) # x: [Batch Variate Time] if x_mark is None: x = self.value_embedding(x) else: x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) # x: [Batch Variate d_model] return self.dropout(x) class DataEmbedding_wo_pos(nn.Module): def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): super(DataEmbedding_wo_pos, self).__init__() self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) self.position_embedding = PositionalEmbedding(d_model=d_model) self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( d_model=d_model, embed_type=embed_type, freq=freq) self.dropout = nn.Dropout(p=dropout) def forward(self, x, x_mark): if x_mark is None: x = self.value_embedding(x) else: x = self.value_embedding(x) + self.temporal_embedding(x_mark) return self.dropout(x) class PatchEmbedding(nn.Module): def __init__(self, d_model, patch_len, stride, padding, dropout): super(PatchEmbedding, self).__init__() # Patching self.patch_len = patch_len self.stride = stride self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space self.value_embedding = nn.Linear(patch_len, d_model, bias=False) # Positional embedding self.position_embedding = PositionalEmbedding(d_model) # Residual dropout self.dropout = nn.Dropout(dropout) def forward(self, x): # do patching n_vars = x.shape[1] x = self.padding_patch_layer(x) x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) # Input encoding x = self.value_embedding(x) + self.position_embedding(x) return self.dropout(x), n_vars ================================================ FILE: layers/FourierCorrelation.py ================================================ # coding=utf-8 # author=maziqing # email=maziqing.mzq@alibaba-inc.com import numpy as np import torch import torch.nn as nn def get_frequency_modes(seq_len, modes=64, mode_select_method='random'): """ get modes on frequency domain: 'random' means sampling randomly; 'else' means sampling the lowest modes; """ modes = min(modes, seq_len // 2) if mode_select_method == 'random': index = list(range(0, seq_len // 2)) np.random.shuffle(index) index = index[:modes] else: index = list(range(0, modes)) index.sort() return index # ########## fourier layer ############# class FourierBlock(nn.Module): def __init__(self, in_channels, out_channels, n_heads, seq_len, modes=0, mode_select_method='random'): super(FourierBlock, self).__init__() print('fourier enhanced block used!') """ 1D Fourier block. It performs representation learning on frequency domain, it does FFT, linear transform, and Inverse FFT. """ # get modes on frequency domain self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method) print('modes={}, index={}'.format(modes, self.index)) self.n_heads = n_heads self.scale = (1 / (in_channels * out_channels)) self.weights1 = nn.Parameter( self.scale * torch.rand(self.n_heads, in_channels // self.n_heads, out_channels // self.n_heads, len(self.index), dtype=torch.float)) self.weights2 = nn.Parameter( self.scale * torch.rand(self.n_heads, in_channels // self.n_heads, out_channels // self.n_heads, len(self.index), dtype=torch.float)) # Complex multiplication def compl_mul1d(self, order, x, weights): x_flag = True w_flag = True if not torch.is_complex(x): x_flag = False x = torch.complex(x, torch.zeros_like(x).to(x.device)) if not torch.is_complex(weights): w_flag = False weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device)) if x_flag or w_flag: return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag), torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real)) else: return torch.einsum(order, x.real, weights.real) def forward(self, q, k, v, mask): # size = [B, L, H, E] B, L, H, E = q.shape x = q.permute(0, 2, 3, 1) # Compute Fourier coefficients x_ft = torch.fft.rfft(x, dim=-1) # Perform Fourier neural operations out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat) for wi, i in enumerate(self.index): if i >= x_ft.shape[3] or wi >= out_ft.shape[3]: continue out_ft[:, :, :, wi] = self.compl_mul1d("bhi,hio->bho", x_ft[:, :, :, i], torch.complex(self.weights1, self.weights2)[:, :, :, wi]) # Return to time domain x = torch.fft.irfft(out_ft, n=x.size(-1)) return (x, None) # ########## Fourier Cross Former #################### class FourierCrossAttention(nn.Module): def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random', activation='tanh', policy=0, num_heads=8): super(FourierCrossAttention, self).__init__() print(' fourier enhanced cross attention used!') """ 1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT. """ self.activation = activation self.in_channels = in_channels self.out_channels = out_channels # get modes for queries and keys (& values) on frequency domain self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method) self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method) print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q)) print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv)) self.scale = (1 / (in_channels * out_channels)) self.weights1 = nn.Parameter( self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float)) self.weights2 = nn.Parameter( self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float)) # Complex multiplication def compl_mul1d(self, order, x, weights): x_flag = True w_flag = True if not torch.is_complex(x): x_flag = False x = torch.complex(x, torch.zeros_like(x).to(x.device)) if not torch.is_complex(weights): w_flag = False weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device)) if x_flag or w_flag: return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag), torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real)) else: return torch.einsum(order, x.real, weights.real) def forward(self, q, k, v, mask): # size = [B, L, H, E] B, L, H, E = q.shape xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L] xk = k.permute(0, 2, 3, 1) xv = v.permute(0, 2, 3, 1) # Compute Fourier coefficients xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat) xq_ft = torch.fft.rfft(xq, dim=-1) for i, j in enumerate(self.index_q): if j >= xq_ft.shape[3]: continue xq_ft_[:, :, :, i] = xq_ft[:, :, :, j] xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat) xk_ft = torch.fft.rfft(xk, dim=-1) for i, j in enumerate(self.index_kv): if j >= xk_ft.shape[3]: continue xk_ft_[:, :, :, i] = xk_ft[:, :, :, j] # perform attention mechanism on frequency domain xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_)) if self.activation == 'tanh': xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh()) elif self.activation == 'softmax': xqk_ft = torch.softmax(abs(xqk_ft), dim=-1) xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft)) else: raise Exception('{} actiation function is not implemented'.format(self.activation)) xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_) xqkvw = self.compl_mul1d("bhex,heox->bhox", xqkv_ft, torch.complex(self.weights1, self.weights2)) out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat) for i, j in enumerate(self.index_q): if i >= xqkvw.shape[3] or j >= out_ft.shape[3]: continue out_ft[:, :, :, j] = xqkvw[:, :, :, i] # Return to time domain out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)) return (out, None) ================================================ FILE: layers/MSGBlock.py ================================================ from math import sqrt import numpy as np import torch.nn as nn import torch.nn.functional as F import torch from torch import nn, Tensor from einops import rearrange from einops.layers.torch import Rearrange from utils.masking import TriangularCausalMask class Predict(nn.Module): def __init__(self, individual, c_out, seq_len, pred_len, dropout): super(Predict, self).__init__() self.individual = individual self.c_out = c_out if self.individual: self.seq2pred = nn.ModuleList() self.dropout = nn.ModuleList() for i in range(self.c_out): self.seq2pred.append(nn.Linear(seq_len , pred_len)) self.dropout.append(nn.Dropout(dropout)) else: self.seq2pred = nn.Linear(seq_len , pred_len) self.dropout = nn.Dropout(dropout) #(B, c_out , seq) def forward(self, x): if self.individual: out = [] for i in range(self.c_out): per_out = self.seq2pred[i](x[:,i,:]) per_out = self.dropout[i](per_out) out.append(per_out) out = torch.stack(out,dim=1) else: out = self.seq2pred(x) out = self.dropout(out) return out class Attention_Block(nn.Module): def __init__(self, d_model, d_ff=None, n_heads=8, dropout=0.1, activation="relu"): super(Attention_Block, self).__init__() d_ff = d_ff or 4 * d_model self.attention = self_attention(FullAttention, d_model, n_heads=n_heads) self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, attn_mask=None): new_x, attn = self.attention( x, x, x, attn_mask=attn_mask ) x = x + self.dropout(new_x) y = x = self.norm1(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm2(x + y) class self_attention(nn.Module): def __init__(self, attention, d_model ,n_heads): super(self_attention, self).__init__() d_keys = d_model // n_heads d_values = d_model // n_heads self.inner_attention = attention( attention_dropout = 0.1) self.query_projection = nn.Linear(d_model, d_keys * n_heads) self.key_projection = nn.Linear(d_model, d_keys * n_heads) self.value_projection = nn.Linear(d_model, d_values * n_heads) self.out_projection = nn.Linear(d_values * n_heads, d_model) self.n_heads = n_heads def forward(self, queries ,keys ,values, attn_mask= None): B, L, _ = queries.shape _, S, _ = keys.shape H = self.n_heads queries = self.query_projection(queries).view(B, L, H, -1) keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) out, attn = self.inner_attention( queries, keys, values, attn_mask ) out = out.view(B, L, -1) out = self.out_projection(out) return out , attn class FullAttention(nn.Module): def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(FullAttention, self).__init__() self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape scale = self.scale or 1. / sqrt(E) scores = torch.einsum("blhe,bshe->bhls", queries, keys) if self.mask_flag: if attn_mask is None: attn_mask = TriangularCausalMask(B, L, device=queries.device) scores.masked_fill_(attn_mask.mask, -np.inf) A = self.dropout(torch.softmax(scale * scores, dim=-1)) V = torch.einsum("bhls,bshd->blhd", A, values) # return V.contiguous() if self.output_attention: return (V.contiguous(), A) else: return (V.contiguous(), None) class GraphBlock(nn.Module): def __init__(self, c_out , d_model , conv_channel, skip_channel, gcn_depth , dropout, propalpha ,seq_len , node_dim): super(GraphBlock, self).__init__() self.nodevec1 = nn.Parameter(torch.randn(c_out, node_dim), requires_grad=True) self.nodevec2 = nn.Parameter(torch.randn(node_dim, c_out), requires_grad=True) self.start_conv = nn.Conv2d(1, conv_channel, (d_model - c_out + 1, 1)) self.gconv1 = mixprop(conv_channel, skip_channel, gcn_depth, dropout, propalpha) self.gelu = nn.GELU() self.end_conv = nn.Conv2d(skip_channel, seq_len , (1, seq_len )) self.linear = nn.Linear(c_out, d_model) self.norm = nn.LayerNorm(d_model) # x in (B, T, d_model) # Here we use a mlp to fit a complex mapping f (x) def forward(self, x): adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1) out = x.unsqueeze(1).transpose(2, 3) out = self.start_conv(out) out = self.gelu(self.gconv1(out , adp)) out = self.end_conv(out).squeeze(-1) out = self.linear(out) return self.norm(x + out) class nconv(nn.Module): def __init__(self): super(nconv,self).__init__() def forward(self,x, A): x = torch.einsum('ncwl,vw->ncvl',(x,A)) # x = torch.einsum('ncwl,wv->nclv',(x,A) return x.contiguous() class linear(nn.Module): def __init__(self,c_in,c_out,bias=True): super(linear,self).__init__() self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=bias) def forward(self,x): return self.mlp(x) class mixprop(nn.Module): def __init__(self,c_in,c_out,gdep,dropout,alpha): super(mixprop, self).__init__() self.nconv = nconv() self.mlp = linear((gdep+1)*c_in,c_out) self.gdep = gdep self.dropout = dropout self.alpha = alpha def forward(self, x, adj): adj = adj + torch.eye(adj.size(0)).to(x.device) d = adj.sum(1) h = x out = [h] a = adj / d.view(-1, 1) for i in range(self.gdep): h = self.alpha*x + (1-self.alpha)*self.nconv(h,a) out.append(h) ho = torch.cat(out,dim=1) ho = self.mlp(ho) return ho class simpleVIT(nn.Module): def __init__(self, in_channels, emb_size, patch_size=2, depth=1, num_heads=4, dropout=0.1,init_weight =True): super(simpleVIT, self).__init__() self.emb_size = emb_size self.depth = depth self.to_patch = nn.Sequential( nn.Conv2d(in_channels, emb_size, 2 * patch_size + 1, padding= patch_size), Rearrange('b e (h) (w) -> b (h w) e'), ) self.layers = nn.ModuleList([]) for _ in range(self.depth): self.layers.append(nn.ModuleList([ nn.LayerNorm(emb_size), MultiHeadAttention(emb_size, num_heads, dropout), FeedForward(emb_size, emb_size) ])) if init_weight: self._initialize_weights() def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self,x): B , N ,_ ,P = x.shape x = self.to_patch(x) # x = x.permute(0, 2, 3, 1).reshape(B,-1, N) for norm ,attn, ff in self.layers: x = attn(norm(x)) + x x = ff(x) + x x = x.transpose(1,2).reshape(B, self.emb_size ,-1, P) return x class MultiHeadAttention(nn.Module): def __init__(self, emb_size, num_heads, dropout): super().__init__() self.emb_size = emb_size self.num_heads = num_heads self.keys = nn.Linear(emb_size, emb_size) self.queries = nn.Linear(emb_size, emb_size) self.values = nn.Linear(emb_size, emb_size) self.att_drop = nn.Dropout(dropout) self.projection = nn.Linear(emb_size, emb_size) def forward(self, x: Tensor, mask: Tensor = None) -> Tensor: queries = rearrange(self.queries(x), "b n (h d) -> b h n d", h=self.num_heads) keys = rearrange(self.keys(x), "b n (h d) -> b h n d", h=self.num_heads) values = rearrange(self.values(x), "b n (h d) -> b h n d", h=self.num_heads) energy = torch.einsum('bhqd, bhkd -> bhqk', queries, keys) if mask is not None: fill_value = torch.finfo(torch.float32).min energy.mask_fill(~mask, fill_value) scaling = self.emb_size ** (1 / 2) att = F.softmax(energy, dim=-1) / scaling att = self.att_drop(att) # sum up over the third axis out = torch.einsum('bhal, bhlv -> bhav ', att, values) out = rearrange(out, "b h n d -> b n (h d)") out = self.projection(out) return out class FeedForward(nn.Module): def __init__(self, dim, hidden_dim): super().__init__() self.net = nn.Sequential( nn.LayerNorm(dim), nn.Linear(dim, hidden_dim), nn.GELU(), nn.Linear(hidden_dim, dim), ) def forward(self, x): return self.net(x) ================================================ FILE: layers/MambaBlock.py ================================================ import math import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange, repeat from mamba_ssm.ops.selective_scan_interface import selective_scan_fn try: from causal_conv1d import causal_conv1d_fn, causal_conv1d_update except ImportError: causal_conv1d_fn, causal_conv1d_update = None, None ### DELETED: selective_state_update is not used in this experiment since it does not support the use of timevariant dt, B, C flags. # from mamba_ssm.ops.selective_scan_interface import mamba_inner_fn # try: # from mamba_ssm.ops.triton.selective_state_update import selective_state_update # except ImportError: # selective_state_update = None class Mamba_TimeVariant(nn.Module): """ Mamba Block with support for time-variant dt, B, C. The time-variant parameters are controlled by `timevariant_dt`, `timevariant_B`, and `timevariant_C` flags. Difference from the original `modules.mamba_simple.Mamba` class: - In `step()`, `x_proj` can be `None`, so dt, B, and C are split only when guarded by if `self.x_proj` is not `None`. - When `tv_dt=False`, dt is constructed as a bias-based constant and expanded to shape `(B, d_inner)` via repeat to match einsum dimensions. - When `d_conv=0`, step avoids accessing depthwise convolution weights and instead follows the `SiLU(x)` path. - In cache creation (`allocate_inference_cache`, `_get_states_from_cache`), dtype and device are selected safely even when `conv1d` is `Identity`. """ def __init__( self, d_model, d_input=None, ### added d_output=None, ### added d_state=16, d_conv=4, expand=2, dt_rank="auto", dt_min=0.001, dt_max=0.1, dt_init="random", dt_scale=1.0, dt_init_floor=1e-4, conv_bias=True, bias=False, use_fast_path=False, # Fused kernel options. ** Fixed to False for this experiment ** layer_idx=None, device=None, dtype=None, timevariant_dt=True, ### ADDED: to support timevariant dt timevariant_B=True, ### ADDED: to support timevariant B timevariant_C=True, ### ADDED: to support timevariant C use_D=True, ### ADDED: to control the usage of D parameter ): factory_kwargs = {"device": device, "dtype": dtype} super().__init__() self.d_model = d_model self.d_input = d_input if d_input is not None else d_model ### ADDED: for various input dimensions self.d_output = d_output if d_output is not None else d_model ### ADDED: for various output dimensions self.d_state = d_state self.d_conv = d_conv self.expand = expand self.d_inner = int(self.expand * self.d_model) self.dt_rank = math.ceil(self.d_model / 16) if dt_rank == "auto" else dt_rank self.use_fast_path = use_fast_path self.layer_idx = layer_idx ### MODIFIED: change the in_feature dimension from d_model to d_input self.in_proj = nn.Linear(self.d_input, self.d_inner * 2, bias=bias, **factory_kwargs) self.conv1d = nn.Conv1d( in_channels=self.d_inner, out_channels=self.d_inner, bias=conv_bias, kernel_size=d_conv, groups=self.d_inner, padding=d_conv - 1, **factory_kwargs, ) if d_conv > 0 else nn.Identity() ### MODIFIED: Skip the convolution if d_conv is set to 0 self.activation = "silu" self.act = nn.SiLU() ### MODIFIED: adjust the x_proj layer to support timevariant dt, B, C. ### this is possible since selective_scan.cpp has `is_variable_B` and `is_variable_C` flags that control the usage of timevariant B and C self.tv_dt, self.tv_B, self.tv_C = timevariant_dt, timevariant_B, timevariant_C self.tv_proj_dim = [0, 0, 0,] if timevariant_dt | timevariant_B | timevariant_C: if timevariant_dt: self.tv_proj_dim[0] = self.dt_rank if timevariant_B: self.tv_proj_dim[1] = self.d_state if timevariant_C: self.tv_proj_dim[2] = self.d_state self.x_proj = nn.Linear( self.d_inner, sum(self.tv_proj_dim), bias=False, **factory_kwargs ) if sum(self.tv_proj_dim) > 0 else None ### ADDED: if tv flags are False, we will use constants for dt, B, C if not timevariant_B: self.B = nn.Parameter(torch.rand(self.d_inner, self.d_state, **factory_kwargs)) self.B._no_weight_decay = True if not timevariant_C: self.C = nn.Parameter(torch.rand(self.d_inner, self.d_state, **factory_kwargs)) self.C._no_weight_decay = True self.dt_proj = nn.Linear(self.dt_rank, self.d_inner, bias=True, **factory_kwargs) # Initialize special dt projection to preserve variance at initialization dt_init_std = self.dt_rank**-0.5 * dt_scale if dt_init == "constant": nn.init.constant_(self.dt_proj.weight, dt_init_std) elif dt_init == "random": nn.init.uniform_(self.dt_proj.weight, -dt_init_std, dt_init_std) else: raise NotImplementedError # Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max dt = torch.exp( torch.rand(self.d_inner, **factory_kwargs) * (math.log(dt_max) - math.log(dt_min)) + math.log(dt_min) ).clamp(min=dt_init_floor) # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759 inv_dt = dt + torch.log(-torch.expm1(-dt)) with torch.no_grad(): self.dt_proj.bias.copy_(inv_dt) # Our initialization would set all Linear.bias to zero, need to mark this one as _no_reinit self.dt_proj.bias._no_reinit = True # S4D real initialization # (expand * d_model, d_state) # A = [[1, 2, ..., d_state], [1, 2, ..., d_state], ..., [1, 2, ..., d_state]] A = repeat( torch.arange(1, self.d_state + 1, dtype=torch.float32, device=device), "n -> d n", d=self.d_inner, ).contiguous() A_log = torch.log(A) # Keep A_log in fp32 self.A_log = nn.Parameter(A_log) self.A_log._no_weight_decay = True # D "skip" parameter ### MODIFIED: D is a learnable parameter only if use_D is True else it is not used ### this is possible since selective_scan.cpp allows D to be optional if use_D: self.D = nn.Parameter(torch.ones(self.d_inner, device=device)).float() self.D._no_weight_decay = True else: self.D = None ### MODIFIED: out_proj now has d_output instead of d_model self.out_proj = nn.Linear(self.d_inner, self.d_output, bias=bias, **factory_kwargs) def forward(self, hidden_states, inference_params=None): """ hidden_states: (B, L, D) Returns: same shape as hidden_states """ batch, seqlen, d_input = hidden_states.shape conv_state, ssm_state = None, None if inference_params is not None: conv_state, ssm_state = self._get_states_from_cache(inference_params, batch) if inference_params.seqlen_offset > 0: # The states are updated inplace out, _, _ = self.step(hidden_states, conv_state, ssm_state) return out # We do matmul and transpose BLH -> HBL at the same time xz = rearrange( self.in_proj.weight @ rearrange(hidden_states, "b l d -> d (b l)"), "d (b l) -> b d l", l=seqlen, ) # (d_inner * 2, d_input) @ (d_input, batch * seqlen) -> (d_inner * 2, batch, seqlen) -> (batch, d_inner * 2, seqlen) if self.in_proj.bias is not None: xz = xz + rearrange(self.in_proj.bias.to(dtype=xz.dtype), "d -> d 1") A = -torch.exp(self.A_log.float()) # (d_inner, d_state). always have negative values. ### DELETED: Actually not used in this experiment since we should control the usage of timevariant dt,B,C # # In the backward pass we write dx and dz next to each other to avoid torch.cat # if self.use_fast_path and causal_conv1d_fn is not None and inference_params is None: # Doesn't support outputting the states # out = mamba_inner_fn( # xz, # self.conv1d.weight, # self.conv1d.bias, # self.x_proj.weight, # self.dt_proj.weight, # self.out_proj.weight, # self.out_proj.bias, # A, # None, # input-dependent B # None, # input-dependent C # self.D, # delta_bias=self.dt_proj.bias.float(), # delta_softplus=True, # ) # else: x, z = xz.chunk(2, dim=1) # (batch, d_inner, seqlen), (batch, d_inner, seqlen) # Compute short convolution if conv_state is not None: # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise. if self.d_conv > 0: conv_state.copy_(F.pad(x, (self.d_conv - x.shape[-1], 0))) # Update state (B D W) ### MODIFIED: use causal_conv if available if (causal_conv1d_fn is None) or (self.d_conv not in [2, 3, 4]): x = self.act(self.conv1d(x)[..., :seqlen]) else: assert self.activation in ["silu", "swish"] x = causal_conv1d_fn( x=x, weight=rearrange(self.conv1d.weight, "d 1 w -> d w"), bias=self.conv1d.bias, activation=self.activation, ) # (batch, d_inner, seqlen) # We're careful here about the layout, to avoid extra transposes. # We want dt to have d as the slowest moving dimension # and L as the fastest moving dimension, since those are what the ssm_scan kernel expects. ### MODIFIED: x_proj is now optional and only used if either timevariant dt, B, or C is True if self.x_proj is not None: x_dbl = self.x_proj(rearrange(x, "b d l -> (b l) d")) # (batch, d_inner, seqlen) -> (batch * seqlen, d_inner) -> (batch * seqlen, ...) depending on timevariant flags dt, B, C = torch.split(x_dbl, self.tv_proj_dim, dim=-1) # (batch * seqlen, dt_rank), (batch * seqlen, d_state), (batch * seqlen, d_state) if enabled ### MODIFIED: dt, B, C are now set based on each timevariant flags # If timevariant dt is False, we use a constant dt, which will be set in delta_bias parameter. Thus, we don't need to compute dt here. if not self.tv_dt: dt = torch.zeros(batch, self.d_inner, seqlen, device=self.dt_proj.bias.device, dtype=self.dt_proj.bias.dtype) # (batch, d_inner, seqlen) else: dt = self.dt_proj.weight @ dt.t() # (d_inner, d_rank) @ (d_rank, batch * seqlen) -> (d_inner, batch * seqlen) dt = rearrange(dt, "d (b l) -> b d l", l=seqlen) # (batch, d_inner, seqlen) # if timevariant B is False, we use a constant B, which is defined in __init__. if not self.tv_B: B = self.B # (d_inner, d_state) else: B = rearrange(B, "(b l) dstate -> b dstate l", l=seqlen).contiguous() # (b, dstate, l) # if timevariant C is False, we use a constant C, which is defined in __init__. if not self.tv_C: C = self.C # (d_inner, d_state) else: C = rearrange(C, "(b l) dstate -> b dstate l", l=seqlen).contiguous() # (b, dstate, l) assert self.activation in ["silu", "swish"] y = selective_scan_fn( x, dt, A, B, C, self.D, z=z, delta_bias=self.dt_proj.bias.float(), delta_softplus=True, return_last_state=ssm_state is not None, ) if ssm_state is not None: y, last_state = y ssm_state.copy_(last_state) y = rearrange(y, "b d l -> b l d") out = self.out_proj(y) return out def step(self, hidden_states, conv_state, ssm_state): dtype = hidden_states.dtype assert hidden_states.shape[1] == 1, "Only support decoding with 1 token at a time for now" xz = self.in_proj(hidden_states.squeeze(1)) # (batch, d_inner * 2) x, z = xz.chunk(2, dim=-1) # (batch, d_inner), (batch, d_inner) # Conv step if self.d_conv == 0: x = self.act(x).to(dtype=dtype) elif (causal_conv1d_update is None) or (self.d_conv not in [2, 3, 4]): conv_state.copy_(torch.roll(conv_state, shifts=-1, dims=-1)) # Update state (B D W) conv_state[:, :, -1] = x x = torch.sum(conv_state * rearrange(self.conv1d.weight, "d 1 w -> d w"), dim=-1) # (B D) if self.conv1d.bias is not None: x = x + self.conv1d.bias x = self.act(x).to(dtype=dtype) else: x = causal_conv1d_update( x, conv_state, rearrange(self.conv1d.weight, "d 1 w -> d w"), self.conv1d.bias, self.activation, ) if self.x_proj is not None: x_db = self.x_proj(x) # (B, d_inner) -> (B, dt_rank + d_state + d_state) dt, B, C = torch.split(x_db, self.tv_proj_dim, dim=-1) # (B, dt_rank), (B, d_state), (B, d_state) # SSM step ### DELETED: selective_state_update function does not support the use of timevariant dt, B, C. # if selective_state_update is None: # dt = F.softplus(dt + self.dt_proj.bias.to(dtype=dt.dtype)) # dA = torch.exp(torch.einsum("bd,dn->bdn", dt, A)) # dB = torch.einsum("bd,bn->bdn", dt, B) # ssm_state.copy_(ssm_state * dA + rearrange(x, "b d -> b d 1") * dB) # y = torch.einsum("bdn,bn->bd", ssm_state.to(dtype), C) # y = y + self.D.to(dtype) * x # y = y * self.act(z) # (B D) # else: # y = selective_state_update( # ssm_state, x, dt, A, B, C, self.D, z=z, dt_bias=self.dt_proj.bias, dt_softplus=True # ) ### MODIFIED: dt, B are now set based on the timevariant flags. if not self.tv_dt: dt = F.softplus(self.dt_proj.bias.to(dtype=x.dtype)) dt = repeat(dt, "d -> b d", b=x.shape[0]) # (B, d_inner) else: dt = F.linear(dt, self.dt_proj.weight) # (B, dt_rank) @ (dt_rank, d_inner) -> (B, d_inner) dt = F.softplus(dt + self.dt_proj.bias.to(dtype=dt.dtype)) # (B, d_inner) if not self.tv_B: dB = torch.einsum("bd,dn->bdn", dt, self.B) # (B, d_inner, d_state) else: dB = torch.einsum("bd,bn->bdn", dt, B) # (B, d_inner, d_state) A = -torch.exp(self.A_log.float()) # (d_inner, d_state) dA = torch.exp(torch.einsum("bd,dn->bdn", dt, A)) ssm_state.copy_(ssm_state * dA + rearrange(x, "b d -> b d 1") * dB) # (B, d_inner, d_state) ### MODIFIED: C is now set based on the timevariant flags. if not self.tv_C: y = torch.einsum("bdn,dn->bd", ssm_state.to(dtype), self.C) # (B, d_inner, d_state) @ (d_inner, d_state) -> (B, d_inner) else: y = torch.einsum("bdn,bn->bd", ssm_state.to(dtype), C) # (B, d_inner, d_state) @ (B, d_state) -> (B, d_inner) ### MODIFIED: skip connection is now applied based on the use_D flag. if self.D is not None: y = y + self.D.to(dtype) * x # (B, d_inner) + (d_inner) * (B, d_inner) -> (B, d_inner) y = y * self.act(z) # (B, d_inner) out = self.out_proj(y) # (B, d_inner) -> (B, d_output) return out.unsqueeze(1), conv_state, ssm_state def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None, **kwargs): device = self.out_proj.weight.device conv_dtype = (self.conv1d.weight.dtype if hasattr(self.conv1d, "weight") else self.in_proj.weight.dtype) if dtype is None else dtype conv_state = torch.zeros( batch_size, self.d_model * self.expand, self.d_conv, device=device, dtype=conv_dtype ) ssm_dtype = self.dt_proj.weight.dtype if dtype is None else dtype # ssm_dtype = torch.float32 ssm_state = torch.zeros( batch_size, self.d_model * self.expand, self.d_state, device=device, dtype=ssm_dtype ) return conv_state, ssm_state def _get_states_from_cache(self, inference_params, batch_size, initialize_states=False): assert self.layer_idx is not None if self.layer_idx not in inference_params.key_value_memory_dict: batch_shape = (batch_size,) conv_state = torch.zeros( batch_size, self.d_model * self.expand, self.d_conv, device=(self.conv1d.weight.device if hasattr(self.conv1d, "weight") else self.in_proj.weight.device), dtype=(self.conv1d.weight.dtype if hasattr(self.conv1d, "weight") else self.in_proj.weight.dtype), ) ssm_state = torch.zeros( batch_size, self.d_model * self.expand, self.d_state, device=self.dt_proj.weight.device, dtype=self.dt_proj.weight.dtype, # dtype=torch.float32, ) inference_params.key_value_memory_dict[self.layer_idx] = (conv_state, ssm_state) else: conv_state, ssm_state = inference_params.key_value_memory_dict[self.layer_idx] # TODO: What if batch size changes between generation, and we reuse the same states? if initialize_states: conv_state.zero_() ssm_state.zero_() return conv_state, ssm_state ================================================ FILE: layers/MultiWaveletCorrelation.py ================================================ import torch import numpy as np import torch.nn as nn import torch.nn.functional as F from torch import Tensor from typing import List, Tuple import math from functools import partial from torch import nn, einsum, diagonal from math import log2, ceil import pdb from sympy import Poly, legendre, Symbol, chebyshevt from scipy.special import eval_legendre def legendreDer(k, x): def _legendre(k, x): return (2 * k + 1) * eval_legendre(k, x) out = 0 for i in np.arange(k - 1, -1, -2): out += _legendre(i, x) return out def phi_(phi_c, x, lb=0, ub=1): mask = np.logical_or(x < lb, x > ub) * 1.0 return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1 - mask) def get_phi_psi(k, base): x = Symbol('x') phi_coeff = np.zeros((k, k)) phi_2x_coeff = np.zeros((k, k)) if base == 'legendre': for ki in range(k): coeff_ = Poly(legendre(ki, 2 * x - 1), x).all_coeffs() phi_coeff[ki, :ki + 1] = np.flip(np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64)) coeff_ = Poly(legendre(ki, 4 * x - 1), x).all_coeffs() phi_2x_coeff[ki, :ki + 1] = np.flip(np.sqrt(2) * np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64)) psi1_coeff = np.zeros((k, k)) psi2_coeff = np.zeros((k, k)) for ki in range(k): psi1_coeff[ki, :] = phi_2x_coeff[ki, :] for i in range(k): a = phi_2x_coeff[ki, :ki + 1] b = phi_coeff[i, :i + 1] prod_ = np.convolve(a, b) prod_[np.abs(prod_) < 1e-8] = 0 proj_ = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum() psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :] psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :] for j in range(ki): a = phi_2x_coeff[ki, :ki + 1] b = psi1_coeff[j, :] prod_ = np.convolve(a, b) prod_[np.abs(prod_) < 1e-8] = 0 proj_ = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum() psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :] psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :] a = psi1_coeff[ki, :] prod_ = np.convolve(a, a) prod_[np.abs(prod_) < 1e-8] = 0 norm1 = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum() a = psi2_coeff[ki, :] prod_ = np.convolve(a, a) prod_[np.abs(prod_) < 1e-8] = 0 norm2 = (prod_ * 1 / (np.arange(len(prod_)) + 1) * (1 - np.power(0.5, 1 + np.arange(len(prod_))))).sum() norm_ = np.sqrt(norm1 + norm2) psi1_coeff[ki, :] /= norm_ psi2_coeff[ki, :] /= norm_ psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0 psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0 phi = [np.poly1d(np.flip(phi_coeff[i, :])) for i in range(k)] psi1 = [np.poly1d(np.flip(psi1_coeff[i, :])) for i in range(k)] psi2 = [np.poly1d(np.flip(psi2_coeff[i, :])) for i in range(k)] elif base == 'chebyshev': for ki in range(k): if ki == 0: phi_coeff[ki, :ki + 1] = np.sqrt(2 / np.pi) phi_2x_coeff[ki, :ki + 1] = np.sqrt(2 / np.pi) * np.sqrt(2) else: coeff_ = Poly(chebyshevt(ki, 2 * x - 1), x).all_coeffs() phi_coeff[ki, :ki + 1] = np.flip(2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64)) coeff_ = Poly(chebyshevt(ki, 4 * x - 1), x).all_coeffs() phi_2x_coeff[ki, :ki + 1] = np.flip( np.sqrt(2) * 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64)) phi = [partial(phi_, phi_coeff[i, :]) for i in range(k)] x = Symbol('x') kUse = 2 * k roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots() x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64) # x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1) # not needed for our purpose here, we use even k always to avoid wm = np.pi / kUse / 2 psi1_coeff = np.zeros((k, k)) psi2_coeff = np.zeros((k, k)) psi1 = [[] for _ in range(k)] psi2 = [[] for _ in range(k)] for ki in range(k): psi1_coeff[ki, :] = phi_2x_coeff[ki, :] for i in range(k): proj_ = (wm * phi[i](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum() psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :] psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :] for j in range(ki): proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum() psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :] psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :] psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5) psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5, ub=1) norm1 = (wm * psi1[ki](x_m) * psi1[ki](x_m)).sum() norm2 = (wm * psi2[ki](x_m) * psi2[ki](x_m)).sum() norm_ = np.sqrt(norm1 + norm2) psi1_coeff[ki, :] /= norm_ psi2_coeff[ki, :] /= norm_ psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0 psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0 psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5 + 1e-16) psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5 + 1e-16, ub=1) return phi, psi1, psi2 def get_filter(base, k): def psi(psi1, psi2, i, inp): mask = (inp <= 0.5) * 1.0 return psi1[i](inp) * mask + psi2[i](inp) * (1 - mask) if base not in ['legendre', 'chebyshev']: raise Exception('Base not supported') x = Symbol('x') H0 = np.zeros((k, k)) H1 = np.zeros((k, k)) G0 = np.zeros((k, k)) G1 = np.zeros((k, k)) PHI0 = np.zeros((k, k)) PHI1 = np.zeros((k, k)) phi, psi1, psi2 = get_phi_psi(k, base) if base == 'legendre': roots = Poly(legendre(k, 2 * x - 1)).all_roots() x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64) wm = 1 / k / legendreDer(k, 2 * x_m - 1) / eval_legendre(k - 1, 2 * x_m - 1) for ki in range(k): for kpi in range(k): H0[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum() G0[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum() H1[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum() G1[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum() PHI0 = np.eye(k) PHI1 = np.eye(k) elif base == 'chebyshev': x = Symbol('x') kUse = 2 * k roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots() x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64) # x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1) # not needed for our purpose here, we use even k always to avoid wm = np.pi / kUse / 2 for ki in range(k): for kpi in range(k): H0[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum() G0[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum() H1[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum() G1[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum() PHI0[ki, kpi] = (wm * phi[ki](2 * x_m) * phi[kpi](2 * x_m)).sum() * 2 PHI1[ki, kpi] = (wm * phi[ki](2 * x_m - 1) * phi[kpi](2 * x_m - 1)).sum() * 2 PHI0[np.abs(PHI0) < 1e-8] = 0 PHI1[np.abs(PHI1) < 1e-8] = 0 H0[np.abs(H0) < 1e-8] = 0 H1[np.abs(H1) < 1e-8] = 0 G0[np.abs(G0) < 1e-8] = 0 G1[np.abs(G1) < 1e-8] = 0 return H0, H1, G0, G1, PHI0, PHI1 class MultiWaveletTransform(nn.Module): """ 1D multiwavelet block. """ def __init__(self, ich=1, k=8, alpha=16, c=128, nCZ=1, L=0, base='legendre', attention_dropout=0.1): super(MultiWaveletTransform, self).__init__() print('base', base) self.k = k self.c = c self.L = L self.nCZ = nCZ self.Lk0 = nn.Linear(ich, c * k) self.Lk1 = nn.Linear(c * k, ich) self.ich = ich self.MWT_CZ = nn.ModuleList(MWT_CZ1d(k, alpha, L, c, base) for i in range(nCZ)) def forward(self, queries, keys, values, attn_mask): B, L, H, E = queries.shape _, S, _, D = values.shape if L > S: zeros = torch.zeros_like(queries[:, :(L - S), :]).float() values = torch.cat([values, zeros], dim=1) keys = torch.cat([keys, zeros], dim=1) else: values = values[:, :L, :, :] keys = keys[:, :L, :, :] values = values.view(B, L, -1) V = self.Lk0(values).view(B, L, self.c, -1) for i in range(self.nCZ): V = self.MWT_CZ[i](V) if i < self.nCZ - 1: V = F.relu(V) V = self.Lk1(V.view(B, L, -1)) V = V.view(B, L, -1, D) return (V.contiguous(), None) class MultiWaveletCross(nn.Module): """ 1D Multiwavelet Cross Attention layer. """ def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64, k=8, ich=512, L=0, base='legendre', mode_select_method='random', initializer=None, activation='tanh', **kwargs): super(MultiWaveletCross, self).__init__() print('base', base) self.c = c self.k = k self.L = L H0, H1, G0, G1, PHI0, PHI1 = get_filter(base, k) H0r = H0 @ PHI0 G0r = G0 @ PHI0 H1r = H1 @ PHI1 G1r = G1 @ PHI1 H0r[np.abs(H0r) < 1e-8] = 0 H1r[np.abs(H1r) < 1e-8] = 0 G0r[np.abs(G0r) < 1e-8] = 0 G1r[np.abs(G1r) < 1e-8] = 0 self.max_item = 3 self.attn1 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, seq_len_kv=seq_len_kv, modes=modes, activation=activation, mode_select_method=mode_select_method) self.attn2 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, seq_len_kv=seq_len_kv, modes=modes, activation=activation, mode_select_method=mode_select_method) self.attn3 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, seq_len_kv=seq_len_kv, modes=modes, activation=activation, mode_select_method=mode_select_method) self.attn4 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q, seq_len_kv=seq_len_kv, modes=modes, activation=activation, mode_select_method=mode_select_method) self.T0 = nn.Linear(k, k) self.register_buffer('ec_s', torch.Tensor( np.concatenate((H0.T, H1.T), axis=0))) self.register_buffer('ec_d', torch.Tensor( np.concatenate((G0.T, G1.T), axis=0))) self.register_buffer('rc_e', torch.Tensor( np.concatenate((H0r, G0r), axis=0))) self.register_buffer('rc_o', torch.Tensor( np.concatenate((H1r, G1r), axis=0))) self.Lk = nn.Linear(ich, c * k) self.Lq = nn.Linear(ich, c * k) self.Lv = nn.Linear(ich, c * k) self.out = nn.Linear(c * k, ich) self.modes1 = modes def forward(self, q, k, v, mask=None): B, N, H, E = q.shape # (B, N, H, E) torch.Size([3, 768, 8, 2]) _, S, _, _ = k.shape # (B, S, H, E) torch.Size([3, 96, 8, 2]) q = q.view(q.shape[0], q.shape[1], -1) k = k.view(k.shape[0], k.shape[1], -1) v = v.view(v.shape[0], v.shape[1], -1) q = self.Lq(q) q = q.view(q.shape[0], q.shape[1], self.c, self.k) k = self.Lk(k) k = k.view(k.shape[0], k.shape[1], self.c, self.k) v = self.Lv(v) v = v.view(v.shape[0], v.shape[1], self.c, self.k) if N > S: zeros = torch.zeros_like(q[:, :(N - S), :]).float() v = torch.cat([v, zeros], dim=1) k = torch.cat([k, zeros], dim=1) else: v = v[:, :N, :, :] k = k[:, :N, :, :] ns = math.floor(np.log2(N)) nl = pow(2, math.ceil(np.log2(N))) extra_q = q[:, 0:nl - N, :, :] extra_k = k[:, 0:nl - N, :, :] extra_v = v[:, 0:nl - N, :, :] q = torch.cat([q, extra_q], 1) k = torch.cat([k, extra_k], 1) v = torch.cat([v, extra_v], 1) Ud_q = torch.jit.annotate(List[Tuple[Tensor]], []) Ud_k = torch.jit.annotate(List[Tuple[Tensor]], []) Ud_v = torch.jit.annotate(List[Tuple[Tensor]], []) Us_q = torch.jit.annotate(List[Tensor], []) Us_k = torch.jit.annotate(List[Tensor], []) Us_v = torch.jit.annotate(List[Tensor], []) Ud = torch.jit.annotate(List[Tensor], []) Us = torch.jit.annotate(List[Tensor], []) # decompose for i in range(ns - self.L): # print('q shape',q.shape) d, q = self.wavelet_transform(q) Ud_q += [tuple([d, q])] Us_q += [d] for i in range(ns - self.L): d, k = self.wavelet_transform(k) Ud_k += [tuple([d, k])] Us_k += [d] for i in range(ns - self.L): d, v = self.wavelet_transform(v) Ud_v += [tuple([d, v])] Us_v += [d] for i in range(ns - self.L): dk, sk = Ud_k[i], Us_k[i] dq, sq = Ud_q[i], Us_q[i] dv, sv = Ud_v[i], Us_v[i] Ud += [self.attn1(dq[0], dk[0], dv[0], mask)[0] + self.attn2(dq[1], dk[1], dv[1], mask)[0]] Us += [self.attn3(sq, sk, sv, mask)[0]] v = self.attn4(q, k, v, mask)[0] # reconstruct for i in range(ns - 1 - self.L, -1, -1): v = v + Us[i] v = torch.cat((v, Ud[i]), -1) v = self.evenOdd(v) v = self.out(v[:, :N, :, :].contiguous().view(B, N, -1)) return (v.contiguous(), None) def wavelet_transform(self, x): xa = torch.cat([x[:, ::2, :, :], x[:, 1::2, :, :], ], -1) d = torch.matmul(xa, self.ec_d) s = torch.matmul(xa, self.ec_s) return d, s def evenOdd(self, x): B, N, c, ich = x.shape # (B, N, c, k) assert ich == 2 * self.k x_e = torch.matmul(x, self.rc_e) x_o = torch.matmul(x, self.rc_o) x = torch.zeros(B, N * 2, c, self.k, device=x.device) x[..., ::2, :, :] = x_e x[..., 1::2, :, :] = x_o return x class FourierCrossAttentionW(nn.Module): def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=16, activation='tanh', mode_select_method='random'): super(FourierCrossAttentionW, self).__init__() print('corss fourier correlation used!') self.in_channels = in_channels self.out_channels = out_channels self.modes1 = modes self.activation = activation def compl_mul1d(self, order, x, weights): x_flag = True w_flag = True if not torch.is_complex(x): x_flag = False x = torch.complex(x, torch.zeros_like(x).to(x.device)) if not torch.is_complex(weights): w_flag = False weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device)) if x_flag or w_flag: return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag), torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real)) else: return torch.einsum(order, x.real, weights.real) def forward(self, q, k, v, mask): B, L, E, H = q.shape xq = q.permute(0, 3, 2, 1) # size = [B, H, E, L] torch.Size([3, 8, 64, 512]) xk = k.permute(0, 3, 2, 1) xv = v.permute(0, 3, 2, 1) self.index_q = list(range(0, min(int(L // 2), self.modes1))) self.index_k_v = list(range(0, min(int(xv.shape[3] // 2), self.modes1))) # Compute Fourier coefficients xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat) xq_ft = torch.fft.rfft(xq, dim=-1) for i, j in enumerate(self.index_q): xq_ft_[:, :, :, i] = xq_ft[:, :, :, j] xk_ft_ = torch.zeros(B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat) xk_ft = torch.fft.rfft(xk, dim=-1) for i, j in enumerate(self.index_k_v): xk_ft_[:, :, :, i] = xk_ft[:, :, :, j] xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_)) if self.activation == 'tanh': xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh()) elif self.activation == 'softmax': xqk_ft = torch.softmax(abs(xqk_ft), dim=-1) xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft)) else: raise Exception('{} actiation function is not implemented'.format(self.activation)) xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_) xqkvw = xqkv_ft out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat) for i, j in enumerate(self.index_q): out_ft[:, :, :, j] = xqkvw[:, :, :, i] out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)).permute(0, 3, 2, 1) # size = [B, L, H, E] return (out, None) class sparseKernelFT1d(nn.Module): def __init__(self, k, alpha, c=1, nl=1, initializer=None, **kwargs): super(sparseKernelFT1d, self).__init__() self.modes1 = alpha self.scale = (1 / (c * k * c * k)) self.weights1 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.float)) self.weights2 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.float)) self.weights1.requires_grad = True self.weights2.requires_grad = True self.k = k def compl_mul1d(self, order, x, weights): x_flag = True w_flag = True if not torch.is_complex(x): x_flag = False x = torch.complex(x, torch.zeros_like(x).to(x.device)) if not torch.is_complex(weights): w_flag = False weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device)) if x_flag or w_flag: return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag), torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real)) else: return torch.einsum(order, x.real, weights.real) def forward(self, x): B, N, c, k = x.shape # (B, N, c, k) x = x.view(B, N, -1) x = x.permute(0, 2, 1) x_fft = torch.fft.rfft(x) # Multiply relevant Fourier modes l = min(self.modes1, N // 2 + 1) out_ft = torch.zeros(B, c * k, N // 2 + 1, device=x.device, dtype=torch.cfloat) out_ft[:, :, :l] = self.compl_mul1d("bix,iox->box", x_fft[:, :, :l], torch.complex(self.weights1, self.weights2)[:, :, :l]) x = torch.fft.irfft(out_ft, n=N) x = x.permute(0, 2, 1).view(B, N, c, k) return x # ## class MWT_CZ1d(nn.Module): def __init__(self, k=3, alpha=64, L=0, c=1, base='legendre', initializer=None, **kwargs): super(MWT_CZ1d, self).__init__() self.k = k self.L = L H0, H1, G0, G1, PHI0, PHI1 = get_filter(base, k) H0r = H0 @ PHI0 G0r = G0 @ PHI0 H1r = H1 @ PHI1 G1r = G1 @ PHI1 H0r[np.abs(H0r) < 1e-8] = 0 H1r[np.abs(H1r) < 1e-8] = 0 G0r[np.abs(G0r) < 1e-8] = 0 G1r[np.abs(G1r) < 1e-8] = 0 self.max_item = 3 self.A = sparseKernelFT1d(k, alpha, c) self.B = sparseKernelFT1d(k, alpha, c) self.C = sparseKernelFT1d(k, alpha, c) self.T0 = nn.Linear(k, k) self.register_buffer('ec_s', torch.Tensor( np.concatenate((H0.T, H1.T), axis=0))) self.register_buffer('ec_d', torch.Tensor( np.concatenate((G0.T, G1.T), axis=0))) self.register_buffer('rc_e', torch.Tensor( np.concatenate((H0r, G0r), axis=0))) self.register_buffer('rc_o', torch.Tensor( np.concatenate((H1r, G1r), axis=0))) def forward(self, x): B, N, c, k = x.shape # (B, N, k) ns = math.floor(np.log2(N)) nl = pow(2, math.ceil(np.log2(N))) extra_x = x[:, 0:nl - N, :, :] x = torch.cat([x, extra_x], 1) Ud = torch.jit.annotate(List[Tensor], []) Us = torch.jit.annotate(List[Tensor], []) for i in range(ns - self.L): d, x = self.wavelet_transform(x) Ud += [self.A(d) + self.B(x)] Us += [self.C(d)] x = self.T0(x) # coarsest scale transform # reconstruct for i in range(ns - 1 - self.L, -1, -1): x = x + Us[i] x = torch.cat((x, Ud[i]), -1) x = self.evenOdd(x) x = x[:, :N, :, :] return x def wavelet_transform(self, x): xa = torch.cat([x[:, ::2, :, :], x[:, 1::2, :, :], ], -1) d = torch.matmul(xa, self.ec_d) s = torch.matmul(xa, self.ec_s) return d, s def evenOdd(self, x): B, N, c, ich = x.shape # (B, N, c, k) assert ich == 2 * self.k x_e = torch.matmul(x, self.rc_e) x_o = torch.matmul(x, self.rc_o) x = torch.zeros(B, N * 2, c, self.k, device=x.device) x[..., ::2, :, :] = x_e x[..., 1::2, :, :] = x_o return x ================================================ FILE: layers/Pyraformer_EncDec.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.nn.modules.linear import Linear from layers.SelfAttention_Family import AttentionLayer, FullAttention from layers.Embed import DataEmbedding import math def get_mask(input_size, window_size, inner_size): """Get the attention mask of PAM-Naive""" # Get the size of all layers all_size = [] all_size.append(input_size) for i in range(len(window_size)): layer_size = math.floor(all_size[i] / window_size[i]) all_size.append(layer_size) seq_length = sum(all_size) mask = torch.zeros(seq_length, seq_length) # get intra-scale mask inner_window = inner_size // 2 for layer_idx in range(len(all_size)): start = sum(all_size[:layer_idx]) for i in range(start, start + all_size[layer_idx]): left_side = max(i - inner_window, start) right_side = min(i + inner_window + 1, start + all_size[layer_idx]) mask[i, left_side:right_side] = 1 # get inter-scale mask for layer_idx in range(1, len(all_size)): start = sum(all_size[:layer_idx]) for i in range(start, start + all_size[layer_idx]): left_side = (start - all_size[layer_idx - 1]) + \ (i - start) * window_size[layer_idx - 1] if i == (start + all_size[layer_idx] - 1): right_side = start else: right_side = ( start - all_size[layer_idx - 1]) + (i - start + 1) * window_size[layer_idx - 1] mask[i, left_side:right_side] = 1 mask[left_side:right_side, i] = 1 mask = (1 - mask).bool() return mask, all_size def refer_points(all_sizes, window_size): """Gather features from PAM's pyramid sequences""" input_size = all_sizes[0] indexes = torch.zeros(input_size, len(all_sizes)) for i in range(input_size): indexes[i][0] = i former_index = i for j in range(1, len(all_sizes)): start = sum(all_sizes[:j]) inner_layer_idx = former_index - (start - all_sizes[j - 1]) former_index = start + \ min(inner_layer_idx // window_size[j - 1], all_sizes[j] - 1) indexes[i][j] = former_index indexes = indexes.unsqueeze(0).unsqueeze(3) return indexes.long() class RegularMask(): def __init__(self, mask): self._mask = mask.unsqueeze(1) @property def mask(self): return self._mask class EncoderLayer(nn.Module): """ Compose with two layers """ def __init__(self, d_model, d_inner, n_head, dropout=0.1, normalize_before=True): super(EncoderLayer, self).__init__() self.slf_attn = AttentionLayer( FullAttention(mask_flag=True, factor=0, attention_dropout=dropout, output_attention=False), d_model, n_head) self.pos_ffn = PositionwiseFeedForward( d_model, d_inner, dropout=dropout, normalize_before=normalize_before) def forward(self, enc_input, slf_attn_mask=None): attn_mask = RegularMask(slf_attn_mask) enc_output, _ = self.slf_attn( enc_input, enc_input, enc_input, attn_mask=attn_mask) enc_output = self.pos_ffn(enc_output) return enc_output class Encoder(nn.Module): """ A encoder model with self attention mechanism. """ def __init__(self, configs, window_size, inner_size): super().__init__() d_bottleneck = configs.d_model//4 self.mask, self.all_size = get_mask( configs.seq_len, window_size, inner_size) self.indexes = refer_points(self.all_size, window_size) self.layers = nn.ModuleList([ EncoderLayer(configs.d_model, configs.d_ff, configs.n_heads, dropout=configs.dropout, normalize_before=False) for _ in range(configs.e_layers) ]) # naive pyramid attention self.enc_embedding = DataEmbedding( configs.enc_in, configs.d_model, configs.dropout) self.conv_layers = Bottleneck_Construct( configs.d_model, window_size, d_bottleneck) def forward(self, x_enc, x_mark_enc): seq_enc = self.enc_embedding(x_enc, x_mark_enc) mask = self.mask.repeat(len(seq_enc), 1, 1).to(x_enc.device) seq_enc = self.conv_layers(seq_enc) for i in range(len(self.layers)): seq_enc = self.layers[i](seq_enc, mask) indexes = self.indexes.repeat(seq_enc.size( 0), 1, 1, seq_enc.size(2)).to(seq_enc.device) indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2)) all_enc = torch.gather(seq_enc, 1, indexes) seq_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1) return seq_enc class ConvLayer(nn.Module): def __init__(self, c_in, window_size): super(ConvLayer, self).__init__() self.downConv = nn.Conv1d(in_channels=c_in, out_channels=c_in, kernel_size=window_size, stride=window_size) self.norm = nn.BatchNorm1d(c_in) self.activation = nn.ELU() def forward(self, x): x = self.downConv(x) x = self.norm(x) x = self.activation(x) return x class Bottleneck_Construct(nn.Module): """Bottleneck convolution CSCM""" def __init__(self, d_model, window_size, d_inner): super(Bottleneck_Construct, self).__init__() if not isinstance(window_size, list): self.conv_layers = nn.ModuleList([ ConvLayer(d_inner, window_size), ConvLayer(d_inner, window_size), ConvLayer(d_inner, window_size) ]) else: self.conv_layers = [] for i in range(len(window_size)): self.conv_layers.append(ConvLayer(d_inner, window_size[i])) self.conv_layers = nn.ModuleList(self.conv_layers) self.up = Linear(d_inner, d_model) self.down = Linear(d_model, d_inner) self.norm = nn.LayerNorm(d_model) def forward(self, enc_input): temp_input = self.down(enc_input).permute(0, 2, 1) all_inputs = [] for i in range(len(self.conv_layers)): temp_input = self.conv_layers[i](temp_input) all_inputs.append(temp_input) all_inputs = torch.cat(all_inputs, dim=2).transpose(1, 2) all_inputs = self.up(all_inputs) all_inputs = torch.cat([enc_input, all_inputs], dim=1) all_inputs = self.norm(all_inputs) return all_inputs class PositionwiseFeedForward(nn.Module): """ Two-layer position-wise feed-forward neural network. """ def __init__(self, d_in, d_hid, dropout=0.1, normalize_before=True): super().__init__() self.normalize_before = normalize_before self.w_1 = nn.Linear(d_in, d_hid) self.w_2 = nn.Linear(d_hid, d_in) self.layer_norm = nn.LayerNorm(d_in, eps=1e-6) self.dropout = nn.Dropout(dropout) def forward(self, x): residual = x if self.normalize_before: x = self.layer_norm(x) x = F.gelu(self.w_1(x)) x = self.dropout(x) x = self.w_2(x) x = self.dropout(x) x = x + residual if not self.normalize_before: x = self.layer_norm(x) return x ================================================ FILE: layers/SelfAttention_Family.py ================================================ import torch import torch.nn as nn import numpy as np from math import sqrt from utils.masking import TriangularCausalMask, ProbMask from reformer_pytorch import LSHSelfAttention from einops import rearrange, repeat class DSAttention(nn.Module): '''De-stationary Attention''' def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(DSAttention, self).__init__() self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): B, L, H, E = queries.shape _, S, _, D = values.shape scale = self.scale or 1. / sqrt(E) tau = 1.0 if tau is None else tau.unsqueeze( 1).unsqueeze(1) # B x 1 x 1 x 1 delta = 0.0 if delta is None else delta.unsqueeze( 1).unsqueeze(1) # B x 1 x 1 x S # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta if self.mask_flag: if attn_mask is None: attn_mask = TriangularCausalMask(B, L, device=queries.device) scores.masked_fill_(attn_mask.mask, -np.inf) A = self.dropout(torch.softmax(scale * scores, dim=-1)) V = torch.einsum("bhls,bshd->blhd", A, values) if self.output_attention: return V.contiguous(), A else: return V.contiguous(), None class FullAttention(nn.Module): def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(FullAttention, self).__init__() self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): B, L, H, E = queries.shape _, S, _, D = values.shape scale = self.scale or 1. / sqrt(E) scores = torch.einsum("blhe,bshe->bhls", queries, keys) if self.mask_flag: if attn_mask is None: attn_mask = TriangularCausalMask(B, L, device=queries.device) scores.masked_fill_(attn_mask.mask, -np.inf) A = self.dropout(torch.softmax(scale * scores, dim=-1)) V = torch.einsum("bhls,bshd->blhd", A, values) if self.output_attention: return V.contiguous(), A else: return V.contiguous(), None class ProbAttention(nn.Module): def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): super(ProbAttention, self).__init__() self.factor = factor self.scale = scale self.mask_flag = mask_flag self.output_attention = output_attention self.dropout = nn.Dropout(attention_dropout) def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) # Q [B, H, L, D] B, H, L_K, E = K.shape _, _, L_Q, _ = Q.shape # calculate the sampled Q_K K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) # real U = U_part(factor*ln(L_k))*L_q index_sample = torch.randint(L_K, (L_Q, sample_k)) K_sample = K_expand[:, :, torch.arange( L_Q).unsqueeze(1), index_sample, :] Q_K_sample = torch.matmul( Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() # find the Top_k query with sparisty measurement M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) M_top = M.topk(n_top, sorted=False)[1] # use the reduced Q to calculate Q_K Q_reduce = Q[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], M_top, :] # factor*ln(L_q) Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k return Q_K, M_top def _get_initial_context(self, V, L_Q): B, H, L_V, D = V.shape if not self.mask_flag: # V_sum = V.sum(dim=-2) V_sum = V.mean(dim=-2) contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() else: # use mask # requires that L_Q == L_V, i.e. for self-attention only assert (L_Q == L_V) contex = V.cumsum(dim=-2) return contex def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): B, H, L_V, D = V.shape if self.mask_flag: attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) scores.masked_fill_(attn_mask.mask, -np.inf) attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) context_in[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = torch.matmul(attn, V).type_as(context_in) if self.output_attention: attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device) attns[torch.arange(B)[:, None, None], torch.arange(H)[ None, :, None], index, :] = attn return context_in, attns else: return context_in, None def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): B, L_Q, H, D = queries.shape _, L_K, _, _ = keys.shape queries = queries.transpose(2, 1) keys = keys.transpose(2, 1) values = values.transpose(2, 1) U_part = self.factor * \ np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) u = self.factor * \ np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) U_part = U_part if U_part < L_K else L_K u = u if u < L_Q else L_Q scores_top, index = self._prob_QK( queries, keys, sample_k=U_part, n_top=u) # add scale factor scale = self.scale or 1. / sqrt(D) if scale is not None: scores_top = scores_top * scale # get the context context = self._get_initial_context(values, L_Q) # update the context with selected top_k queries context, attn = self._update_context( context, values, scores_top, index, L_Q, attn_mask) return context.contiguous(), attn class AttentionLayer(nn.Module): def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None): super(AttentionLayer, self).__init__() d_keys = d_keys or (d_model // n_heads) d_values = d_values or (d_model // n_heads) self.inner_attention = attention self.query_projection = nn.Linear(d_model, d_keys * n_heads) self.key_projection = nn.Linear(d_model, d_keys * n_heads) self.value_projection = nn.Linear(d_model, d_values * n_heads) self.out_projection = nn.Linear(d_values * n_heads, d_model) self.n_heads = n_heads def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): B, L, _ = queries.shape _, S, _ = keys.shape H = self.n_heads queries = self.query_projection(queries).view(B, L, H, -1) keys = self.key_projection(keys).view(B, S, H, -1) values = self.value_projection(values).view(B, S, H, -1) out, attn = self.inner_attention( queries, keys, values, attn_mask, tau=tau, delta=delta ) out = out.view(B, L, -1) return self.out_projection(out), attn class ReformerLayer(nn.Module): def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None, causal=False, bucket_size=4, n_hashes=4): super().__init__() self.bucket_size = bucket_size self.attn = LSHSelfAttention( dim=d_model, heads=n_heads, bucket_size=bucket_size, n_hashes=n_hashes, causal=causal ) def fit_length(self, queries): # inside reformer: assert N % (bucket_size * 2) == 0 B, N, C = queries.shape if N % (self.bucket_size * 2) == 0: return queries else: # fill the time series fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2)) return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1) def forward(self, queries, keys, values, attn_mask, tau, delta): # in Reformer: defalut queries=keys B, N, C = queries.shape queries = self.attn(self.fit_length(queries))[:, :N, :] return queries, None class TwoStageAttentionLayer(nn.Module): ''' The Two Stage Attention (TSA) Layer input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model] ''' def __init__(self, configs, seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1): super(TwoStageAttentionLayer, self).__init__() d_ff = d_ff or 4 * d_model self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), d_model, n_heads) self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), d_model, n_heads) self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), d_model, n_heads) self.router = nn.Parameter(torch.randn(seg_num, factor, d_model)) self.dropout = nn.Dropout(dropout) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.norm4 = nn.LayerNorm(d_model) self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff), nn.GELU(), nn.Linear(d_ff, d_model)) self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff), nn.GELU(), nn.Linear(d_ff, d_model)) def forward(self, x, attn_mask=None, tau=None, delta=None): # Cross Time Stage: Directly apply MSA to each dimension batch = x.shape[0] time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model') time_enc, attn = self.time_attention( time_in, time_in, time_in, attn_mask=None, tau=None, delta=None ) dim_in = time_in + self.dropout(time_enc) dim_in = self.norm1(dim_in) dim_in = dim_in + self.dropout(self.MLP1(dim_in)) dim_in = self.norm2(dim_in) # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch) batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch) dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None) dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None) dim_enc = dim_send + self.dropout(dim_receive) dim_enc = self.norm3(dim_enc) dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc)) dim_enc = self.norm4(dim_enc) final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch) return final_out ================================================ FILE: layers/StandardNorm.py ================================================ import torch import torch.nn as nn class Normalize(nn.Module): def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False): """ :param num_features: the number of features or channels :param eps: a value added for numerical stability :param affine: if True, RevIN has learnable affine parameters """ super(Normalize, self).__init__() self.num_features = num_features self.eps = eps self.affine = affine self.subtract_last = subtract_last self.non_norm = non_norm if self.affine: self._init_params() def forward(self, x, mode: str): if mode == 'norm': self._get_statistics(x) x = self._normalize(x) elif mode == 'denorm': x = self._denormalize(x) else: raise NotImplementedError return x def _init_params(self): # initialize RevIN params: (C,) self.affine_weight = nn.Parameter(torch.ones(self.num_features)) self.affine_bias = nn.Parameter(torch.zeros(self.num_features)) def _get_statistics(self, x): dim2reduce = tuple(range(1, x.ndim - 1)) if self.subtract_last: self.last = x[:, -1, :].unsqueeze(1) else: self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach() self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach() def _normalize(self, x): if self.non_norm: return x if self.subtract_last: x = x - self.last else: x = x - self.mean x = x / self.stdev if self.affine: x = x * self.affine_weight x = x + self.affine_bias return x def _denormalize(self, x): if self.non_norm: return x if self.affine: x = x - self.affine_bias x = x / (self.affine_weight + self.eps * self.eps) x = x * self.stdev if self.subtract_last: x = x + self.last else: x = x + self.mean return x ================================================ FILE: layers/TimeFilter_layers.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.distributions.normal import Normal class GCN(nn.Module): def __init__(self, dim, n_heads): super().__init__() self.proj = nn.Linear(dim, dim) self.n_heads = n_heads def forward(self, adj, x): # adj [B, H, L, L] B, L, D = x.shape x = self.proj(x).view(B, L, self.n_heads, -1) # [B, L, H, D_] adj = F.normalize(adj, p=1, dim=-1) x = torch.einsum("bhij,bjhd->bihd", adj, x).contiguous() # [B, L, H, D_] x = x.view(B, L, -1) return x ############################### # Ablation ############################### def mask_topk_moe(adj, thre, n_vars, masks): # adj: [B, H, L, L], thre: [B, H, L, 3] if masks is None: B, H, L, _ = adj.shape N = L // n_vars device = adj.device dtype = torch.float32 print("Masks is None!") masks = [] for k in range(L): S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device) T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N)).to(dtype).to(device) ST = torch.ones(L).to(dtype).to(device) - S - T masks.append(torch.stack([S, T, ST], dim=0)) # [L, 3, L] masks = torch.stack(masks, dim=0) adj_mask0 = adj * masks[:, 0, :] adj_mask1 = adj * masks[:, 1, :] adj_mask2 = adj * masks[:, 2, :] adj_mask0[adj_mask0 <= thre[:, :, :, 0].unsqueeze(-1)] = 0 adj_mask1[adj_mask1 <= thre[:, :, :, 1].unsqueeze(-1)] = 0 adj_mask2[adj_mask2 <= thre[:, :, :, 2].unsqueeze(-1)] = 0 adj = adj_mask0 + adj_mask1 + adj_mask2 return adj def mask_topk_area(adj, n_vars, masks, alpha=0.5): # x: [B, H, L, L] B, H, L, _ = adj.shape N = L // n_vars if masks is None: device = adj.device dtype = torch.float32 print("Masks is None!") masks = [] for k in range(L): S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device) T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N)).to(dtype).to(device) ST = torch.ones(L).to(dtype).to(device) - S - T masks.append(torch.stack([S, T, ST], dim=0)) # [L, 3, L] masks = torch.stack(masks, dim=0) # masks [L, 3, L] n0 = n_vars - 1 n1 = N - 1 n2 = L - n0 - n1 - 1 adj_mask0 = adj * masks[:, 0, :] adj_mask1 = adj * masks[:, 1, :] adj_mask2 = adj * masks[:, 2, :] def apply_mask_to_region(adj_mask, n): threshold_idx = int(n * alpha) sorted_values, _ = torch.sort(adj_mask, dim=-1, descending=True) threshold = sorted_values[:, :, :, threshold_idx] return adj_mask * (adj_mask >= threshold.unsqueeze(-1)) adj_mask0 = apply_mask_to_region(adj_mask0, n0) adj_mask1 = apply_mask_to_region(adj_mask1, n1) adj_mask2 = apply_mask_to_region(adj_mask2, n2) adj = adj_mask0 + adj_mask1 + adj_mask2 return adj ########################## class mask_moe(nn.Module): def __init__(self, n_vars, top_p=0.5, num_experts=3, in_dim=96): super().__init__() self.num_experts = num_experts self.n_vars = n_vars self.in_dim = in_dim self.gate = nn.Linear(self.in_dim, num_experts, bias=False) self.noise = nn.Linear(self.in_dim, num_experts, bias=False) self.noisy_gating = 1 #True self.softplus = nn.Softplus() self.softmax = nn.Softmax(2) self.top_p = top_p def cv_squared(self, x): eps = 1e-10 if x.shape[0] == 1: return torch.tensor([0], device=x.device, dtype=x.dtype) return x.float().var() / (x.float().mean() ** 2 + eps) def cross_entropy(self, x): eps = 1e-10 if x.shape[0] == 1: return torch.tensor([0], device=x.device, dtype=x.dtype) return -torch.mul(x, torch.log(x + eps)).sum(dim=1).mean() def noisy_top_k_gating(self, x, is_training, noise_epsilon=1e-2): clean_logits = self.gate(x) if self.noisy_gating and is_training: raw_noise = self.noise(x) noise_stddev = ((self.softplus(raw_noise) + noise_epsilon)) noisy_logits = clean_logits + torch.randn_like(clean_logits) * noise_stddev logits = noisy_logits else: logits = clean_logits # Convert logits to probabilities logits = self.softmax(logits) loss_dynamic = self.cross_entropy(logits) sorted_probs, sorted_indices = torch.sort(logits, descending=True) cumulative_probs = torch.cumsum(sorted_probs, dim=-1) mask = cumulative_probs > self.top_p threshold_indices = mask.long().argmax(dim=-1) threshold_mask = torch.nn.functional.one_hot(threshold_indices, num_classes=sorted_indices.size(-1)).bool() mask = mask & ~threshold_mask top_p_mask = torch.zeros_like(mask) zero_indices = (mask == 0).nonzero(as_tuple=True) top_p_mask[ zero_indices[0], zero_indices[1], sorted_indices[zero_indices[0], zero_indices[1], zero_indices[2]]] = 1 sorted_probs = torch.where(mask, 0.0, sorted_probs) loss_importance = self.cv_squared(sorted_probs.sum(0)) lambda_2 = 0.1 loss = loss_importance + lambda_2 * loss_dynamic return top_p_mask, loss def forward(self, x, masks=None): # x [B, H, L, L] B, H, L, _ = x.shape device = x.device dtype = torch.float32 mask_base = torch.eye(L, device=device, dtype=dtype).unsqueeze(0).unsqueeze(0) if self.top_p == 0.0: return mask_base, 0.0 x = x.reshape(B * H, L, L) gates, loss = self.noisy_top_k_gating(x, self.training) gates = gates.reshape(B, H, L, -1).float() # [B, H, L, 3] if masks is None: print("Masks is None!") masks = [] N = L // self.n_vars for k in range(L): S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device) T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N)).to(dtype).to(device) ST = torch.ones(L).to(dtype).to(device) - S - T masks.append(torch.stack([S, T, ST], dim=0)) # [L, 3, L] masks = torch.stack(masks, dim=0) mask = torch.einsum('bhli,lid->bhld', gates, masks) + mask_base return mask, loss def mask_topk(x, alpha=0.5, largest=False): # B, L = x.shape[0], x.shape[-1] # x: [B, H, L, L] k = int(alpha * x.shape[-1]) _, topk_indices = torch.topk(x, k, dim=-1, largest=largest) mask = torch.ones_like(x, dtype=torch.float32) mask.scatter_(-1, topk_indices, 0) # 1 is topk return mask # [B, H, L, L] class GraphLearner(nn.Module): def __init__(self, dim, n_vars, top_p=0.5, in_dim=96): super().__init__() self.dim = dim self.proj_1 = nn.Linear(dim, dim) self.proj_2 = nn.Linear(dim, dim) self.n_vars = n_vars self.mask_moe = mask_moe(n_vars, top_p=top_p, in_dim=in_dim) def forward(self, x, masks=None, alpha=0.5): # x: [B, H, L, D] adj = F.gelu(torch.einsum('bhid,bhjd->bhij', self.proj_1(x), self.proj_2(x))) adj = adj * mask_topk(adj, alpha) # KNN mask, loss = self.mask_moe(adj, masks) adj = adj * mask return adj, loss # [B, H, L, L] class GraphFilter(nn.Module): def __init__(self, dim, n_vars, n_heads=4, scale=None, top_p=0.5, dropout=0., in_dim=96): super().__init__() self.dim = dim self.n_heads = n_heads self.scale = dim ** (-0.5) if scale is None else scale self.dropout = nn.Dropout(dropout) self.graph_learner = GraphLearner(self.dim // self.n_heads, n_vars, top_p, in_dim=in_dim) self.graph_conv = GCN(self.dim, self.n_heads) def forward(self, x, masks=None, alpha=0.5): # x: [B, L, D] B, L, D = x.shape adj, loss = self.graph_learner(x.reshape(B, L, self.n_heads, -1).permute(0, 2, 1, 3), masks, alpha) # [B, H, L, L] adj = torch.softmax(adj, dim=-1) adj = self.dropout(adj) out = self.graph_conv(adj, x) return out, loss # [B, L, D] class GraphBlock(nn.Module): def __init__(self, dim, n_vars, d_ff=None, n_heads=4, top_p=0.5, dropout=0., in_dim=96): super().__init__() self.dim = dim self.d_ff = dim * 4 if d_ff is None else d_ff self.gnn = GraphFilter(self.dim, n_vars, n_heads, top_p=top_p, dropout=dropout, in_dim=in_dim) self.norm1 = nn.LayerNorm(self.dim) self.ffn = nn.Sequential( nn.Linear(self.dim, self.d_ff), nn.GELU(), nn.Dropout(dropout), nn.Linear(self.d_ff, self.dim), ) self.norm2 = nn.LayerNorm(self.dim) def forward(self, x, masks=None, alpha=0.5): # x: [B, L, D], time_embed: [B, time_embed_dim] out, loss = self.gnn(self.norm1(x), masks, alpha) x = x + out x = x + self.ffn(self.norm2(x)) return x, loss class TimeFilter_Backbone(nn.Module): def __init__(self, hidden_dim, n_vars, d_ff=None, n_heads=4, n_blocks=3, top_p=0.5, dropout=0., in_dim=96): super().__init__() self.dim = hidden_dim self.d_ff = self.dim * 2 if d_ff is None else d_ff # graph blocks self.blocks = nn.ModuleList([ GraphBlock(self.dim, n_vars, self.d_ff, n_heads, top_p, dropout, in_dim) for _ in range(n_blocks) ]) self.n_blocks = n_blocks def forward(self, x, masks=None, alpha=0.5): # x: [B, N, T] moe_loss = 0.0 for block in self.blocks: x, loss = block(x, masks, alpha) moe_loss += loss moe_loss /= self.n_blocks return x, moe_loss # [B, N, T] ================================================ FILE: layers/Transformer_EncDec.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class ConvLayer(nn.Module): def __init__(self, c_in): super(ConvLayer, self).__init__() self.downConv = nn.Conv1d(in_channels=c_in, out_channels=c_in, kernel_size=3, padding=2, padding_mode='circular') self.norm = nn.BatchNorm1d(c_in) self.activation = nn.ELU() self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) def forward(self, x): x = self.downConv(x.permute(0, 2, 1)) x = self.norm(x) x = self.activation(x) x = self.maxPool(x) x = x.transpose(1, 2) return x class EncoderLayer(nn.Module): def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.attention = attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, attn_mask=None, tau=None, delta=None): new_x, attn = self.attention( x, x, x, attn_mask=attn_mask, tau=tau, delta=delta ) x = x + self.dropout(new_x) y = x = self.norm1(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm2(x + y), attn class Encoder(nn.Module): def __init__(self, attn_layers, conv_layers=None, norm_layer=None): super(Encoder, self).__init__() self.attn_layers = nn.ModuleList(attn_layers) self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None self.norm = norm_layer def forward(self, x, attn_mask=None, tau=None, delta=None): # x [B, L, D] attns = [] if self.conv_layers is not None: for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): delta = delta if i == 0 else None x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) x = conv_layer(x) attns.append(attn) x, attn = self.attn_layers[-1](x, tau=tau, delta=None) attns.append(attn) else: for attn_layer in self.attn_layers: x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) attns.append(attn) if self.norm is not None: x = self.norm(x) return x, attns class DecoderLayer(nn.Module): def __init__(self, self_attention, cross_attention, d_model, d_ff=None, dropout=0.1, activation="relu"): super(DecoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention self.cross_attention = cross_attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): x = x + self.dropout(self.self_attention( x, x, x, attn_mask=x_mask, tau=tau, delta=None )[0]) x = self.norm1(x) x = x + self.dropout(self.cross_attention( x, cross, cross, attn_mask=cross_mask, tau=tau, delta=delta )[0]) y = x = self.norm2(x) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm3(x + y) class Decoder(nn.Module): def __init__(self, layers, norm_layer=None, projection=None): super(Decoder, self).__init__() self.layers = nn.ModuleList(layers) self.norm = norm_layer self.projection = projection def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): for layer in self.layers: x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) if self.norm is not None: x = self.norm(x) if self.projection is not None: x = self.projection(x) return x ================================================ FILE: layers/__init__.py ================================================ ================================================ FILE: models/Autoformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Embed import DataEmbedding, DataEmbedding_wo_pos from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp import math import numpy as np class Model(nn.Module): """ Autoformer is the first method to achieve the series-wise connection, with inherent O(LlogL) complexity Paper link: https://openreview.net/pdf?id=I55UqU-M11y """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len # Decomp kernel_size = configs.moving_avg self.decomp = series_decomp(kernel_size) # Embedding self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AutoCorrelationLayer( AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=my_Layernorm(configs.d_model) ) # Decoder if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.decoder = Decoder( [ DecoderLayer( AutoCorrelationLayer( AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AutoCorrelationLayer( AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.c_out, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=my_Layernorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) if self.task_name == 'imputation': self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.d_model * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # decomp init mean = torch.mean(x_enc, dim=1).unsqueeze( 1).repeat(1, self.pred_len, 1) zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) seasonal_init, trend_init = self.decomp(x_enc) # decoder input trend_init = torch.cat( [trend_init[:, -self.label_len:, :], mean], dim=1) seasonal_init = torch.cat( [seasonal_init[:, -self.label_len:, :], zeros], dim=1) # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) # dec dec_out = self.dec_embedding(seasonal_init, x_mark_dec) seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, trend=trend_init) # final dec_out = trend_part + seasonal_part return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) # final dec_out = self.projection(enc_out) return dec_out def anomaly_detection(self, x_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # final dec_out = self.projection(enc_out) return dec_out def classification(self, x_enc, x_mark_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # Output # the output transformer encoder/decoder embeddings don't include non-linearity output = self.act(enc_out) output = self.dropout(output) # zero-out padding embeddings output = output * x_mark_enc.unsqueeze(-1) # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/Chronos.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding from chronos import BaseChronosPipeline class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = BaseChronosPipeline.from_pretrained( "amazon/chronos-bolt-base", device_map="cuda", # use "cpu" for CPU inference and "mps" for Apple Silicon torch_dtype=torch.bfloat16, ) self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): outputs = [] for i in range(x_enc.shape[-1]): output = self.model.predict(x_enc[...,i], prediction_length=self.pred_len) output = output.mean(dim=1) outputs.append(output) dec_out = torch.stack(outputs, dim=-1) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/Chronos2.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding from chronos import BaseChronosPipeline class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = BaseChronosPipeline.from_pretrained("amazon/chronos-2", device_map="cuda") self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc.sub(means) stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc = x_enc.div(stdev) B, L, C = x_enc.shape x_enc = x_enc.permute(0, 2, 1) quantiles, dec_out = self.model.predict_quantiles(x_enc.cpu().numpy(), prediction_length=self.pred_len, quantile_levels=[0.1, 0.5, 0.9]) dec_out = torch.stack(dec_out, dim=0).to(x_enc.device) dec_out= dec_out.permute(0, 2, 1) dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/Crossformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange, repeat from layers.Crossformer_EncDec import scale_block, Encoder, Decoder, DecoderLayer from layers.Embed import PatchEmbedding from layers.SelfAttention_Family import AttentionLayer, FullAttention, TwoStageAttentionLayer from models.PatchTST import FlattenHead from math import ceil class Model(nn.Module): """ Paper link: https://openreview.net/pdf?id=vSVLM2j9eie """ def __init__(self, configs): super(Model, self).__init__() self.enc_in = configs.enc_in self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.seg_len = 12 self.win_size = 2 self.task_name = configs.task_name # The padding operation to handle invisible sgemnet length self.pad_in_len = ceil(1.0 * configs.seq_len / self.seg_len) * self.seg_len self.pad_out_len = ceil(1.0 * configs.pred_len / self.seg_len) * self.seg_len self.in_seg_num = self.pad_in_len // self.seg_len self.out_seg_num = ceil(self.in_seg_num / (self.win_size ** (configs.e_layers - 1))) self.head_nf = configs.d_model * self.out_seg_num # Embedding self.enc_value_embedding = PatchEmbedding(configs.d_model, self.seg_len, self.seg_len, self.pad_in_len - configs.seq_len, 0) self.enc_pos_embedding = nn.Parameter( torch.randn(1, configs.enc_in, self.in_seg_num, configs.d_model)) self.pre_norm = nn.LayerNorm(configs.d_model) # Encoder self.encoder = Encoder( [ scale_block(configs, 1 if l == 0 else self.win_size, configs.d_model, configs.n_heads, configs.d_ff, 1, configs.dropout, self.in_seg_num if l == 0 else ceil(self.in_seg_num / self.win_size ** l), configs.factor ) for l in range(configs.e_layers) ] ) # Decoder self.dec_pos_embedding = nn.Parameter( torch.randn(1, configs.enc_in, (self.pad_out_len // self.seg_len), configs.d_model)) self.decoder = Decoder( [ DecoderLayer( TwoStageAttentionLayer(configs, (self.pad_out_len // self.seg_len), configs.factor, configs.d_model, configs.n_heads, configs.d_ff, configs.dropout), AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), self.seg_len, configs.d_model, configs.d_ff, dropout=configs.dropout, # activation=configs.activation, ) for l in range(configs.e_layers + 1) ], ) if self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len, head_dropout=configs.dropout) elif self.task_name == 'classification': self.flatten = nn.Flatten(start_dim=-2) self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( self.head_nf * configs.enc_in, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # embedding x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d = n_vars) x_enc += self.enc_pos_embedding x_enc = self.pre_norm(x_enc) enc_out, attns = self.encoder(x_enc) dec_in = repeat(self.dec_pos_embedding, 'b ts_d l d -> (repeat b) ts_d l d', repeat=x_enc.shape[0]) dec_out = self.decoder(dec_in, enc_out) return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # embedding x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars) x_enc += self.enc_pos_embedding x_enc = self.pre_norm(x_enc) enc_out, attns = self.encoder(x_enc) dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1) return dec_out def anomaly_detection(self, x_enc): # embedding x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars) x_enc += self.enc_pos_embedding x_enc = self.pre_norm(x_enc) enc_out, attns = self.encoder(x_enc) dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1) return dec_out def classification(self, x_enc, x_mark_enc): # embedding x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars) x_enc += self.enc_pos_embedding x_enc = self.pre_norm(x_enc) enc_out, attns = self.encoder(x_enc) # Output from Non-stationary Transformer output = self.flatten(enc_out[-1].permute(0, 1, 3, 2)) output = self.dropout(output) output = output.reshape(output.shape[0], -1) output = self.projection(output) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/DLinear.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Autoformer_EncDec import series_decomp class Model(nn.Module): """ Paper link: https://arxiv.org/pdf/2205.13504.pdf """ def __init__(self, configs, individual=False): """ individual: Bool, whether shared model among different variates. """ super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': self.pred_len = configs.seq_len else: self.pred_len = configs.pred_len # Series decomposition block from Autoformer self.decompsition = series_decomp(configs.moving_avg) self.individual = individual self.channels = configs.enc_in if self.individual: self.Linear_Seasonal = nn.ModuleList() self.Linear_Trend = nn.ModuleList() for i in range(self.channels): self.Linear_Seasonal.append( nn.Linear(self.seq_len, self.pred_len)) self.Linear_Trend.append( nn.Linear(self.seq_len, self.pred_len)) self.Linear_Seasonal[i].weight = nn.Parameter( (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) self.Linear_Trend[i].weight = nn.Parameter( (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) else: self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len) self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len) self.Linear_Seasonal.weight = nn.Parameter( (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) self.Linear_Trend.weight = nn.Parameter( (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) if self.task_name == 'classification': self.projection = nn.Linear( configs.enc_in * configs.seq_len, configs.num_class) def encoder(self, x): seasonal_init, trend_init = self.decompsition(x) seasonal_init, trend_init = seasonal_init.permute( 0, 2, 1), trend_init.permute(0, 2, 1) if self.individual: seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len], dtype=seasonal_init.dtype).to(seasonal_init.device) trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len], dtype=trend_init.dtype).to(trend_init.device) for i in range(self.channels): seasonal_output[:, i, :] = self.Linear_Seasonal[i]( seasonal_init[:, i, :]) trend_output[:, i, :] = self.Linear_Trend[i]( trend_init[:, i, :]) else: seasonal_output = self.Linear_Seasonal(seasonal_init) trend_output = self.Linear_Trend(trend_init) x = seasonal_output + trend_output return x.permute(0, 2, 1) def forecast(self, x_enc): # Encoder return self.encoder(x_enc) def imputation(self, x_enc): # Encoder return self.encoder(x_enc) def anomaly_detection(self, x_enc): # Encoder return self.encoder(x_enc) def classification(self, x_enc): # Encoder enc_out = self.encoder(x_enc) # Output # (batch_size, seq_length * d_model) output = enc_out.reshape(enc_out.shape[0], -1) # (batch_size, num_classes) output = self.projection(output) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc) return dec_out # [B, N] return None ================================================ FILE: models/ETSformer.py ================================================ import torch import torch.nn as nn from layers.Embed import DataEmbedding from layers.ETSformer_EncDec import EncoderLayer, Encoder, DecoderLayer, Decoder, Transform class Model(nn.Module): """ Paper link: https://arxiv.org/abs/2202.01381 """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': self.pred_len = configs.seq_len else: self.pred_len = configs.pred_len assert configs.e_layers == configs.d_layers, "Encoder and decoder layers must be equal" # Embedding self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( configs.d_model, configs.n_heads, configs.enc_in, configs.seq_len, self.pred_len, configs.top_k, dim_feedforward=configs.d_ff, dropout=configs.dropout, activation=configs.activation, ) for _ in range(configs.e_layers) ] ) # Decoder self.decoder = Decoder( [ DecoderLayer( configs.d_model, configs.n_heads, configs.c_out, self.pred_len, dropout=configs.dropout, ) for _ in range(configs.d_layers) ], ) self.transform = Transform(sigma=0.2) if self.task_name == 'classification': self.act = torch.nn.functional.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): with torch.no_grad(): if self.training: x_enc = self.transform.transform(x_enc) res = self.enc_embedding(x_enc, x_mark_enc) level, growths, seasons = self.encoder(res, x_enc, attn_mask=None) growth, season = self.decoder(growths, seasons) preds = level[:, -1:] + growth + season return preds def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): res = self.enc_embedding(x_enc, x_mark_enc) level, growths, seasons = self.encoder(res, x_enc, attn_mask=None) growth, season = self.decoder(growths, seasons) preds = level[:, -1:] + growth + season return preds def anomaly_detection(self, x_enc): res = self.enc_embedding(x_enc, None) level, growths, seasons = self.encoder(res, x_enc, attn_mask=None) growth, season = self.decoder(growths, seasons) preds = level[:, -1:] + growth + season return preds def classification(self, x_enc, x_mark_enc): res = self.enc_embedding(x_enc, None) _, growths, seasons = self.encoder(res, x_enc, attn_mask=None) growths = torch.sum(torch.stack(growths, 0), 0)[:, :self.seq_len, :] seasons = torch.sum(torch.stack(seasons, 0), 0)[:, :self.seq_len, :] enc_out = growths + seasons output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) # Output output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/FEDformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Embed import DataEmbedding from layers.AutoCorrelation import AutoCorrelationLayer from layers.FourierCorrelation import FourierBlock, FourierCrossAttention from layers.MultiWaveletCorrelation import MultiWaveletCross, MultiWaveletTransform from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp class Model(nn.Module): """ FEDformer performs the attention mechanism on frequency domain and achieved O(N) complexity Paper link: https://proceedings.mlr.press/v162/zhou22g.html """ def __init__(self, configs, version='fourier', mode_select='random', modes=32): """ version: str, for FEDformer, there are two versions to choose, options: [Fourier, Wavelets]. mode_select: str, for FEDformer, there are two mode selection method, options: [random, low]. modes: int, modes to be selected. """ super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.version = version self.mode_select = mode_select self.modes = modes # Decomp self.decomp = series_decomp(configs.moving_avg) self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) if self.version == 'Wavelets': encoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=1, base='legendre') decoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=1, base='legendre') decoder_cross_att = MultiWaveletCross(in_channels=configs.d_model, out_channels=configs.d_model, seq_len_q=self.seq_len // 2 + self.pred_len, seq_len_kv=self.seq_len, modes=self.modes, ich=configs.d_model, base='legendre', activation='tanh') else: encoder_self_att = FourierBlock(in_channels=configs.d_model, out_channels=configs.d_model, n_heads=configs.n_heads, seq_len=self.seq_len, modes=self.modes, mode_select_method=self.mode_select) decoder_self_att = FourierBlock(in_channels=configs.d_model, out_channels=configs.d_model, n_heads=configs.n_heads, seq_len=self.seq_len // 2 + self.pred_len, modes=self.modes, mode_select_method=self.mode_select) decoder_cross_att = FourierCrossAttention(in_channels=configs.d_model, out_channels=configs.d_model, seq_len_q=self.seq_len // 2 + self.pred_len, seq_len_kv=self.seq_len, modes=self.modes, mode_select_method=self.mode_select, num_heads=configs.n_heads) # Encoder self.encoder = Encoder( [ EncoderLayer( AutoCorrelationLayer( encoder_self_att, # instead of multi-head attention in transformer configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=my_Layernorm(configs.d_model) ) # Decoder self.decoder = Decoder( [ DecoderLayer( AutoCorrelationLayer( decoder_self_att, configs.d_model, configs.n_heads), AutoCorrelationLayer( decoder_cross_att, configs.d_model, configs.n_heads), configs.d_model, configs.c_out, configs.d_ff, moving_avg=configs.moving_avg, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=my_Layernorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) if self.task_name == 'imputation': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # decomp init mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) seasonal_init, trend_init = self.decomp(x_enc) # x - moving_avg, moving_avg # decoder input trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) seasonal_init = F.pad(seasonal_init[:, -self.label_len:, :], (0, 0, 0, self.pred_len)) # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) dec_out = self.dec_embedding(seasonal_init, x_mark_dec) enc_out, attns = self.encoder(enc_out, attn_mask=None) # dec seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, trend=trend_init) # final dec_out = trend_part + seasonal_part return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) # final dec_out = self.projection(enc_out) return dec_out def anomaly_detection(self, x_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # final dec_out = self.projection(enc_out) return dec_out def classification(self, x_enc, x_mark_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # Output output = self.act(enc_out) output = self.dropout(output) output = output * x_mark_enc.unsqueeze(-1) output = output.reshape(output.shape[0], -1) output = self.projection(output) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/FiLM.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import numpy as np from scipy import signal from scipy import special as ss device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") def transition(N): Q = np.arange(N, dtype=np.float64) R = (2 * Q + 1)[:, None] # / theta j, i = np.meshgrid(Q, Q) A = np.where(i < j, -1, (-1.) ** (i - j + 1)) * R B = (-1.) ** Q[:, None] * R return A, B class HiPPO_LegT(nn.Module): def __init__(self, N, dt=1.0, discretization='bilinear'): """ N: the order of the HiPPO projection dt: discretization step size - should be roughly inverse to the length of the sequence """ super(HiPPO_LegT, self).__init__() self.N = N A, B = transition(N) C = np.ones((1, N)) D = np.zeros((1,)) A, B, _, _, _ = signal.cont2discrete((A, B, C, D), dt=dt, method=discretization) B = B.squeeze(-1) self.register_buffer('A', torch.Tensor(A).to(device)) self.register_buffer('B', torch.Tensor(B).to(device)) vals = np.arange(0.0, 1.0, dt) self.register_buffer('eval_matrix', torch.Tensor( ss.eval_legendre(np.arange(N)[:, None], 1 - 2 * vals).T).to(device)) def forward(self, inputs): """ inputs : (length, ...) output : (length, ..., N) where N is the order of the HiPPO projection """ c = torch.zeros(inputs.shape[:-1] + tuple([self.N])).to(device) cs = [] for f in inputs.permute([-1, 0, 1]): f = f.unsqueeze(-1) new = f @ self.B.unsqueeze(0) c = F.linear(c, self.A) + new cs.append(c) return torch.stack(cs, dim=0) def reconstruct(self, c): return (self.eval_matrix @ c.unsqueeze(-1)).squeeze(-1) class SpectralConv1d(nn.Module): def __init__(self, in_channels, out_channels, seq_len, ratio=0.5): """ 1D Fourier layer. It does FFT, linear transform, and Inverse FFT. """ super(SpectralConv1d, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.ratio = ratio self.modes = min(32, seq_len // 2) self.index = list(range(0, self.modes)) self.scale = (1 / (in_channels * out_channels)) self.weights_real = nn.Parameter( self.scale * torch.rand(in_channels, out_channels, len(self.index), dtype=torch.float)) self.weights_imag = nn.Parameter( self.scale * torch.rand(in_channels, out_channels, len(self.index), dtype=torch.float)) def compl_mul1d(self, order, x, weights_real, weights_imag): return torch.complex(torch.einsum(order, x.real, weights_real) - torch.einsum(order, x.imag, weights_imag), torch.einsum(order, x.real, weights_imag) + torch.einsum(order, x.imag, weights_real)) def forward(self, x): B, H, E, N = x.shape x_ft = torch.fft.rfft(x) out_ft = torch.zeros(B, H, self.out_channels, x.size(-1) // 2 + 1, device=x.device, dtype=torch.cfloat) a = x_ft[:, :, :, :self.modes] out_ft[:, :, :, :self.modes] = self.compl_mul1d("bjix,iox->bjox", a, self.weights_real, self.weights_imag) x = torch.fft.irfft(out_ft, n=x.size(-1)) return x class Model(nn.Module): """ Paper link: https://arxiv.org/abs/2205.08897 """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.configs = configs self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.seq_len if configs.pred_len == 0 else configs.pred_len self.seq_len_all = self.seq_len + self.label_len self.layers = configs.e_layers self.enc_in = configs.enc_in self.e_layers = configs.e_layers # b, s, f means b, f self.affine_weight = nn.Parameter(torch.ones(1, 1, configs.enc_in)) self.affine_bias = nn.Parameter(torch.zeros(1, 1, configs.enc_in)) self.multiscale = [1, 2, 4] self.window_size = [256] configs.ratio = 0.5 self.legts = nn.ModuleList( [HiPPO_LegT(N=n, dt=1. / self.pred_len / i) for n in self.window_size for i in self.multiscale]) self.spec_conv_1 = nn.ModuleList([SpectralConv1d(in_channels=n, out_channels=n, seq_len=min(self.pred_len, self.seq_len), ratio=configs.ratio) for n in self.window_size for _ in range(len(self.multiscale))]) self.mlp = nn.Linear(len(self.multiscale) * len(self.window_size), 1) if self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.enc_in * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec_true, x_mark_dec): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() x_enc /= stdev x_enc = x_enc * self.affine_weight + self.affine_bias x_decs = [] jump_dist = 0 for i in range(0, len(self.multiscale) * len(self.window_size)): x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len x_in = x_enc[:, -x_in_len:] legt = self.legts[i] x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:] out1 = self.spec_conv_1[i](x_in_c) if self.seq_len >= self.pred_len: x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :] else: x_dec_c = out1.transpose(2, 3)[:, :, -1, :] x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T x_decs.append(x_dec) x_dec = torch.stack(x_decs, dim=-1) x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1) # De-Normalization from Non-stationary Transformer x_dec = x_dec - self.affine_bias x_dec = x_dec / (self.affine_weight + 1e-10) x_dec = x_dec * stdev x_dec = x_dec + means return x_dec def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() x_enc /= stdev x_enc = x_enc * self.affine_weight + self.affine_bias x_decs = [] jump_dist = 0 for i in range(0, len(self.multiscale) * len(self.window_size)): x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len x_in = x_enc[:, -x_in_len:] legt = self.legts[i] x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:] out1 = self.spec_conv_1[i](x_in_c) if self.seq_len >= self.pred_len: x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :] else: x_dec_c = out1.transpose(2, 3)[:, :, -1, :] x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T x_decs.append(x_dec) x_dec = torch.stack(x_decs, dim=-1) x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1) # De-Normalization from Non-stationary Transformer x_dec = x_dec - self.affine_bias x_dec = x_dec / (self.affine_weight + 1e-10) x_dec = x_dec * stdev x_dec = x_dec + means return x_dec def anomaly_detection(self, x_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() x_enc /= stdev x_enc = x_enc * self.affine_weight + self.affine_bias x_decs = [] jump_dist = 0 for i in range(0, len(self.multiscale) * len(self.window_size)): x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len x_in = x_enc[:, -x_in_len:] legt = self.legts[i] x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:] out1 = self.spec_conv_1[i](x_in_c) if self.seq_len >= self.pred_len: x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :] else: x_dec_c = out1.transpose(2, 3)[:, :, -1, :] x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T x_decs.append(x_dec) x_dec = torch.stack(x_decs, dim=-1) x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1) # De-Normalization from Non-stationary Transformer x_dec = x_dec - self.affine_bias x_dec = x_dec / (self.affine_weight + 1e-10) x_dec = x_dec * stdev x_dec = x_dec + means return x_dec def classification(self, x_enc, x_mark_enc): x_enc = x_enc * self.affine_weight + self.affine_bias x_decs = [] jump_dist = 0 for i in range(0, len(self.multiscale) * len(self.window_size)): x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len x_in = x_enc[:, -x_in_len:] legt = self.legts[i] x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:] out1 = self.spec_conv_1[i](x_in_c) if self.seq_len >= self.pred_len: x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :] else: x_dec_c = out1.transpose(2, 3)[:, :, -1, :] x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T x_decs.append(x_dec) x_dec = torch.stack(x_decs, dim=-1) x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1) # Output from Non-stationary Transformer output = self.act(x_dec) output = self.dropout(output) output = output * x_mark_enc.unsqueeze(-1) output = output.reshape(output.shape[0], -1) output = self.projection(output) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/FreTS.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import numpy as np class Model(nn.Module): """ Paper link: https://arxiv.org/pdf/2311.06184.pdf """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': self.pred_len = configs.seq_len else: self.pred_len = configs.pred_len self.embed_size = 128 # embed_size self.hidden_size = 256 # hidden_size self.pred_len = configs.pred_len self.feature_size = configs.enc_in # channels self.seq_len = configs.seq_len self.channel_independence = configs.channel_independence self.sparsity_threshold = 0.01 self.scale = 0.02 self.embeddings = nn.Parameter(torch.randn(1, self.embed_size)) self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size)) self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size)) self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size)) self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size)) self.fc = nn.Sequential( nn.Linear(self.seq_len * self.embed_size, self.hidden_size), nn.LeakyReLU(), nn.Linear(self.hidden_size, self.pred_len) ) # dimension extension def tokenEmb(self, x): # x: [Batch, Input length, Channel] x = x.permute(0, 2, 1) x = x.unsqueeze(3) # N*T*1 x 1*D = N*T*D y = self.embeddings return x * y # frequency temporal learner def MLP_temporal(self, x, B, N, L): # [B, N, T, D] x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on L dimension y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2) x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho") return x # frequency channel learner def MLP_channel(self, x, B, N, L): # [B, N, T, D] x = x.permute(0, 2, 1, 3) # [B, T, N, D] x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on N dimension y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1) x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho") x = x.permute(0, 2, 1, 3) # [B, N, T, D] return x # frequency-domain MLPs # dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights # rb: the real part of bias, ib: the imaginary part of bias def FreMLP(self, B, nd, dimension, x, r, i, rb, ib): o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size], device=x.device) o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size], device=x.device) o1_real = F.relu( torch.einsum('bijd,dd->bijd', x.real, r) - \ torch.einsum('bijd,dd->bijd', x.imag, i) + \ rb ) o1_imag = F.relu( torch.einsum('bijd,dd->bijd', x.imag, r) + \ torch.einsum('bijd,dd->bijd', x.real, i) + \ ib ) y = torch.stack([o1_real, o1_imag], dim=-1) y = F.softshrink(y, lambd=self.sparsity_threshold) y = torch.view_as_complex(y) return y def forecast(self, x_enc): # x: [Batch, Input length, Channel] B, T, N = x_enc.shape # embedding x: [B, N, T, D] x = self.tokenEmb(x_enc) bias = x # [B, N, T, D] if self.channel_independence == '0': x = self.MLP_channel(x, B, N, T) # [B, N, T, D] x = self.MLP_temporal(x, B, N, T) x = x + bias x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1) return x def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc) return dec_out[:, -self.pred_len:, :] # [B, L, D] else: raise ValueError('Only forecast tasks implemented yet') ================================================ FILE: models/Informer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer from layers.SelfAttention_Family import ProbAttention, AttentionLayer from layers.Embed import DataEmbedding class Model(nn.Module): """ Informer with Propspare attention in O(LlogL) complexity Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132 """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len self.label_len = configs.label_len # Embedding self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], [ ConvLayer( configs.d_model ) for l in range(configs.e_layers - 1) ] if configs.distil and ('forecast' in configs.task_name) else None, norm_layer=torch.nn.LayerNorm(configs.d_model) ) # Decoder self.decoder = Decoder( [ DecoderLayer( AttentionLayer( ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AttentionLayer( ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) if self.task_name == 'imputation': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): enc_out = self.enc_embedding(x_enc, x_mark_enc) dec_out = self.dec_embedding(x_dec, x_mark_dec) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) return dec_out # [B, L, D] def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E x_enc = x_enc / std_enc enc_out = self.enc_embedding(x_enc, x_mark_enc) dec_out = self.dec_embedding(x_dec, x_mark_dec) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) dec_out = dec_out * std_enc + mean_enc return dec_out # [B, L, D] def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # enc enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) # final dec_out = self.projection(enc_out) return dec_out def anomaly_detection(self, x_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # final dec_out = self.projection(enc_out) return dec_out def classification(self, x_enc, x_mark_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # Output output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast': dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'short_term_forecast': dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/KANAD.py ================================================ import numpy as np import torch import torch.nn as nn from einops import rearrange class KANADModel(nn.Module): def __init__(self, window: int, order: int, *args, **kwargs) -> None: super().__init__() self.order = order self.window = window self.channels = 2 * self.order + 1 self.register_buffer( "orders", self._create_custom_periodic_cosine(self.window, self.order).unsqueeze( 0 ), # (1, order, window) ) self.out_conv = nn.Conv1d(self.channels, 1, 1, bias=False) self.act = nn.GELU() self.bn1 = nn.BatchNorm1d(self.channels) self.bn3 = nn.BatchNorm1d(1) self.bn2 = nn.BatchNorm1d(self.channels) self.init_conv = nn.Conv1d(self.channels, self.channels, 3, 1, 1, bias=False) self.inner_conv = nn.Conv1d(self.channels, self.channels, 3, 1, 1, bias=False) self.final_conv = nn.Linear(window, window) def forward(self, x: torch.Tensor, return_last: bool = False, *args, **kwargs): res = [] res.append(x.unsqueeze(1)) ff = torch.concat( [self.orders.repeat(x.size(0), 1, 1)] # type: ignore + [torch.cos(order * x.unsqueeze(1)) for order in range(1, self.order + 1)] + [x.unsqueeze(1)], dim=1, ) # batch,self.channel,window res.append(ff) ff = self.init_conv(ff) ff = self.bn1(ff) ff = self.act(ff) ff = self.inner_conv(ff) + res.pop() ff = self.bn2(ff) ff = self.act(ff) ff = self.out_conv(ff) + res.pop() ff = self.bn3(ff) ff = self.act(ff) ff = self.final_conv(ff) if return_last: return ff.squeeze(1), ff return ff.squeeze(1) def _create_custom_periodic_cosine(self, window: int, period) -> torch.Tensor: d = len(period) if isinstance(period, list) else period pl = period if isinstance(period, list) else [i for i in range(1, period + 1)] result = torch.empty(d, window, dtype=torch.float32) for i, p in enumerate(pl): t = torch.arange(0, 1, 1 / window, dtype=torch.float32) / p * 2 * np.pi result[i, :] = torch.cos(t) return result class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.order = configs.d_model # Encoder self.enc = KANADModel(window=self.seq_len, order=configs.d_model) def anomaly_detection(self, x_enc): ## reshape the input [B, L, D] to [B * D, L] x_input = rearrange(x_enc, "B L D -> (B D) L") enc_out = self.enc(x_input) # [B * D, L] dec_out = rearrange(enc_out, "(B D) L -> B L D", B=x_enc.size(0)) # [B, L, D] return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if ( self.task_name == "long_term_forecast" or self.task_name == "short_term_forecast" ): raise NotImplementedError( "Task forecasting for KANAD is temporarily not supported" ) if self.task_name == "imputation": raise NotImplementedError( "Task imputation for KANAD is temporarily not supported" ) if self.task_name == "anomaly_detection": dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == "classification": raise NotImplementedError( "Task classification for KANAD is temporarily not supported" ) return None ================================================ FILE: models/Koopa.py ================================================ import math import torch import torch.nn as nn from data_provider.data_factory import data_provider class FourierFilter(nn.Module): """ Fourier Filter: to time-variant and time-invariant term """ def __init__(self, mask_spectrum): super(FourierFilter, self).__init__() self.mask_spectrum = mask_spectrum def forward(self, x): xf = torch.fft.rfft(x, dim=1) mask = torch.ones_like(xf) mask[:, self.mask_spectrum, :] = 0 x_var = torch.fft.irfft(xf*mask, dim=1) x_inv = x - x_var return x_var, x_inv class MLP(nn.Module): ''' Multilayer perceptron to encode/decode high dimension representation of sequential data ''' def __init__(self, f_in, f_out, hidden_dim=128, hidden_layers=2, dropout=0.05, activation='tanh'): super(MLP, self).__init__() self.f_in = f_in self.f_out = f_out self.hidden_dim = hidden_dim self.hidden_layers = hidden_layers self.dropout = dropout if activation == 'relu': self.activation = nn.ReLU() elif activation == 'tanh': self.activation = nn.Tanh() else: raise NotImplementedError layers = [nn.Linear(self.f_in, self.hidden_dim), self.activation, nn.Dropout(self.dropout)] for i in range(self.hidden_layers-2): layers += [nn.Linear(self.hidden_dim, self.hidden_dim), self.activation, nn.Dropout(dropout)] layers += [nn.Linear(hidden_dim, f_out)] self.layers = nn.Sequential(*layers) def forward(self, x): # x: B x S x f_in # y: B x S x f_out y = self.layers(x) return y class KPLayer(nn.Module): """ A demonstration of finding one step transition of linear system by DMD iteratively """ def __init__(self): super(KPLayer, self).__init__() self.K = None # B E E def one_step_forward(self, z, return_rec=False, return_K=False): B, input_len, E = z.shape assert input_len > 1, 'snapshots number should be larger than 1' x, y = z[:, :-1], z[:, 1:] # solve linear system self.K = torch.linalg.lstsq(x, y).solution # B E E if torch.isnan(self.K).any(): print('Encounter K with nan, replace K by identity matrix') self.K = torch.eye(self.K.shape[1]).to(self.K.device).unsqueeze(0).repeat(B, 1, 1) z_pred = torch.bmm(z[:, -1:], self.K) if return_rec: z_rec = torch.cat((z[:, :1], torch.bmm(x, self.K)), dim=1) return z_rec, z_pred return z_pred def forward(self, z, pred_len=1): assert pred_len >= 1, 'prediction length should not be less than 1' z_rec, z_pred= self.one_step_forward(z, return_rec=True) z_preds = [z_pred] for i in range(1, pred_len): z_pred = torch.bmm(z_pred, self.K) z_preds.append(z_pred) z_preds = torch.cat(z_preds, dim=1) return z_rec, z_preds class KPLayerApprox(nn.Module): """ Find koopman transition of linear system by DMD with multistep K approximation """ def __init__(self): super(KPLayerApprox, self).__init__() self.K = None # B E E self.K_step = None # B E E def forward(self, z, pred_len=1): # z: B L E, koopman invariance space representation # z_rec: B L E, reconstructed representation # z_pred: B S E, forecasting representation B, input_len, E = z.shape assert input_len > 1, 'snapshots number should be larger than 1' x, y = z[:, :-1], z[:, 1:] # solve linear system self.K = torch.linalg.lstsq(x, y).solution # B E E if torch.isnan(self.K).any(): print('Encounter K with nan, replace K by identity matrix') self.K = torch.eye(self.K.shape[1]).to(self.K.device).unsqueeze(0).repeat(B, 1, 1) z_rec = torch.cat((z[:, :1], torch.bmm(x, self.K)), dim=1) # B L E if pred_len <= input_len: self.K_step = torch.linalg.matrix_power(self.K, pred_len) if torch.isnan(self.K_step).any(): print('Encounter multistep K with nan, replace it by identity matrix') self.K_step = torch.eye(self.K_step.shape[1]).to(self.K_step.device).unsqueeze(0).repeat(B, 1, 1) z_pred = torch.bmm(z[:, -pred_len:, :], self.K_step) else: self.K_step = torch.linalg.matrix_power(self.K, input_len) if torch.isnan(self.K_step).any(): print('Encounter multistep K with nan, replace it by identity matrix') self.K_step = torch.eye(self.K_step.shape[1]).to(self.K_step.device).unsqueeze(0).repeat(B, 1, 1) temp_z_pred, all_pred = z, [] for _ in range(math.ceil(pred_len / input_len)): temp_z_pred = torch.bmm(temp_z_pred, self.K_step) all_pred.append(temp_z_pred) z_pred = torch.cat(all_pred, dim=1)[:, :pred_len, :] return z_rec, z_pred class TimeVarKP(nn.Module): """ Koopman Predictor with DMD (analysitical solution of Koopman operator) Utilize local variations within individual sliding window to predict the future of time-variant term """ def __init__(self, enc_in=8, input_len=96, pred_len=96, seg_len=24, dynamic_dim=128, encoder=None, decoder=None, multistep=False, ): super(TimeVarKP, self).__init__() self.input_len = input_len self.pred_len = pred_len self.enc_in = enc_in self.seg_len = seg_len self.dynamic_dim = dynamic_dim self.multistep = multistep self.encoder, self.decoder = encoder, decoder self.freq = math.ceil(self.input_len / self.seg_len) # segment number of input self.step = math.ceil(self.pred_len / self.seg_len) # segment number of output self.padding_len = self.seg_len * self.freq - self.input_len # Approximate mulitstep K by KPLayerApprox when pred_len is large self.dynamics = KPLayerApprox() if self.multistep else KPLayer() def forward(self, x): # x: B L C B, L, C = x.shape res = torch.cat((x[:, L-self.padding_len:, :], x) ,dim=1) res = res.chunk(self.freq, dim=1) # F x B P C, P means seg_len res = torch.stack(res, dim=1).reshape(B, self.freq, -1) # B F PC res = self.encoder(res) # B F H x_rec, x_pred = self.dynamics(res, self.step) # B F H, B S H x_rec = self.decoder(x_rec) # B F PC x_rec = x_rec.reshape(B, self.freq, self.seg_len, self.enc_in) x_rec = x_rec.reshape(B, -1, self.enc_in)[:, :self.input_len, :] # B L C x_pred = self.decoder(x_pred) # B S PC x_pred = x_pred.reshape(B, self.step, self.seg_len, self.enc_in) x_pred = x_pred.reshape(B, -1, self.enc_in)[:, :self.pred_len, :] # B S C return x_rec, x_pred class TimeInvKP(nn.Module): """ Koopman Predictor with learnable Koopman operator Utilize lookback and forecast window snapshots to predict the future of time-invariant term """ def __init__(self, input_len=96, pred_len=96, dynamic_dim=128, encoder=None, decoder=None): super(TimeInvKP, self).__init__() self.dynamic_dim = dynamic_dim self.input_len = input_len self.pred_len = pred_len self.encoder = encoder self.decoder = decoder K_init = torch.randn(self.dynamic_dim, self.dynamic_dim) U, _, V = torch.svd(K_init) # stable initialization self.K = nn.Linear(self.dynamic_dim, self.dynamic_dim, bias=False) self.K.weight.data = torch.mm(U, V.t()) def forward(self, x): # x: B L C res = x.transpose(1, 2) # B C L res = self.encoder(res) # B C H res = self.K(res) # B C H res = self.decoder(res) # B C S res = res.transpose(1, 2) # B S C return res class Model(nn.Module): ''' Paper link: https://arxiv.org/pdf/2305.18803.pdf ''' def __init__(self, configs, dynamic_dim=128, hidden_dim=64, hidden_layers=2, num_blocks=3, multistep=False): """ mask_spectrum: list, shared frequency spectrums seg_len: int, segment length of time series dynamic_dim: int, latent dimension of koopman embedding hidden_dim: int, hidden dimension of en/decoder hidden_layers: int, number of hidden layers of en/decoder num_blocks: int, number of Koopa blocks multistep: bool, whether to use approximation for multistep K alpha: float, spectrum filter ratio """ super(Model, self).__init__() self.task_name = configs.task_name self.enc_in = configs.enc_in self.input_len = configs.seq_len self.pred_len = configs.pred_len self.seg_len = self.pred_len self.num_blocks = num_blocks self.dynamic_dim = dynamic_dim self.hidden_dim = hidden_dim self.hidden_layers = hidden_layers self.multistep = multistep self.alpha = 0.2 self.mask_spectrum = self._get_mask_spectrum(configs) self.disentanglement = FourierFilter(self.mask_spectrum) # shared encoder/decoder to make koopman embedding consistent self.time_inv_encoder = MLP(f_in=self.input_len, f_out=self.dynamic_dim, activation='relu', hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers) self.time_inv_decoder = MLP(f_in=self.dynamic_dim, f_out=self.pred_len, activation='relu', hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers) self.time_inv_kps = self.time_var_kps = nn.ModuleList([ TimeInvKP(input_len=self.input_len, pred_len=self.pred_len, dynamic_dim=self.dynamic_dim, encoder=self.time_inv_encoder, decoder=self.time_inv_decoder) for _ in range(self.num_blocks)]) # shared encoder/decoder to make koopman embedding consistent self.time_var_encoder = MLP(f_in=self.seg_len*self.enc_in, f_out=self.dynamic_dim, activation='tanh', hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers) self.time_var_decoder = MLP(f_in=self.dynamic_dim, f_out=self.seg_len*self.enc_in, activation='tanh', hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers) self.time_var_kps = nn.ModuleList([ TimeVarKP(enc_in=configs.enc_in, input_len=self.input_len, pred_len=self.pred_len, seg_len=self.seg_len, dynamic_dim=self.dynamic_dim, encoder=self.time_var_encoder, decoder=self.time_var_decoder, multistep=self.multistep) for _ in range(self.num_blocks)]) def _get_mask_spectrum(self, configs): """ get shared frequency spectrums """ train_data, train_loader = data_provider(configs, 'train') amps = 0.0 for data in train_loader: lookback_window = data[0] amps += abs(torch.fft.rfft(lookback_window, dim=1)).mean(dim=0).mean(dim=1) mask_spectrum = amps.topk(int(amps.shape[0]*self.alpha)).indices return mask_spectrum # as the spectrums of time-invariant component def forecast(self, x_enc): # Series Stationarization adopted from NSformer mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() x_enc = x_enc / std_enc # Koopman Forecasting residual, forecast = x_enc, None for i in range(self.num_blocks): time_var_input, time_inv_input = self.disentanglement(residual) time_inv_output = self.time_inv_kps[i](time_inv_input) time_var_backcast, time_var_output = self.time_var_kps[i](time_var_input) residual = residual - time_var_backcast if forecast is None: forecast = (time_inv_output + time_var_output) else: forecast += (time_inv_output + time_var_output) # Series Stationarization adopted from NSformer res = forecast * std_enc + mean_enc return res def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec): if self.task_name == 'long_term_forecast': dec_out = self.forecast(x_enc) return dec_out[:, -self.pred_len:, :] # [B, L, D] ================================================ FILE: models/LightTS.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class IEBlock(nn.Module): def __init__(self, input_dim, hid_dim, output_dim, num_node): super(IEBlock, self).__init__() self.input_dim = input_dim self.hid_dim = hid_dim self.output_dim = output_dim self.num_node = num_node self._build() def _build(self): self.spatial_proj = nn.Sequential( nn.Linear(self.input_dim, self.hid_dim), nn.LeakyReLU(), nn.Linear(self.hid_dim, self.hid_dim // 4) ) self.channel_proj = nn.Linear(self.num_node, self.num_node) torch.nn.init.eye_(self.channel_proj.weight) self.output_proj = nn.Linear(self.hid_dim // 4, self.output_dim) def forward(self, x): x = self.spatial_proj(x.permute(0, 2, 1)) x = x.permute(0, 2, 1) + self.channel_proj(x.permute(0, 2, 1)) x = self.output_proj(x.permute(0, 2, 1)) x = x.permute(0, 2, 1) return x class Model(nn.Module): """ Paper link: https://arxiv.org/abs/2207.01186 """ def __init__(self, configs, chunk_size=24): """ chunk_size: int, reshape T into [num_chunks, chunk_size] """ super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': self.pred_len = configs.seq_len else: self.pred_len = configs.pred_len if configs.task_name == 'long_term_forecast' or configs.task_name == 'short_term_forecast': self.chunk_size = min(configs.pred_len, configs.seq_len, chunk_size) else: self.chunk_size = min(configs.seq_len, chunk_size) # assert (self.seq_len % self.chunk_size == 0) if self.seq_len % self.chunk_size != 0: self.seq_len += (self.chunk_size - self.seq_len % self.chunk_size) # padding in order to ensure complete division self.num_chunks = self.seq_len // self.chunk_size self.d_model = configs.d_model self.enc_in = configs.enc_in self.dropout = configs.dropout if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.enc_in * configs.seq_len, configs.num_class) self._build() def _build(self): self.layer_1 = IEBlock( input_dim=self.chunk_size, hid_dim=self.d_model // 4, output_dim=self.d_model // 4, num_node=self.num_chunks ) self.chunk_proj_1 = nn.Linear(self.num_chunks, 1) self.layer_2 = IEBlock( input_dim=self.chunk_size, hid_dim=self.d_model // 4, output_dim=self.d_model // 4, num_node=self.num_chunks ) self.chunk_proj_2 = nn.Linear(self.num_chunks, 1) self.layer_3 = IEBlock( input_dim=self.d_model // 2, hid_dim=self.d_model // 2, output_dim=self.pred_len, num_node=self.enc_in ) self.ar = nn.Linear(self.seq_len, self.pred_len) def encoder(self, x): B, T, N = x.size() # padding x = torch.cat([x, torch.zeros((B, self.seq_len - T, N)).to(x.device)], dim=1) highway = self.ar(x.permute(0, 2, 1)) highway = highway.permute(0, 2, 1) # continuous sampling x1 = x.reshape(B, self.num_chunks, self.chunk_size, N) x1 = x1.permute(0, 3, 2, 1) x1 = x1.reshape(-1, self.chunk_size, self.num_chunks) x1 = self.layer_1(x1) x1 = self.chunk_proj_1(x1).squeeze(dim=-1) # interval sampling x2 = x.reshape(B, self.chunk_size, self.num_chunks, N) x2 = x2.permute(0, 3, 1, 2) x2 = x2.reshape(-1, self.chunk_size, self.num_chunks) x2 = self.layer_2(x2) x2 = self.chunk_proj_2(x2).squeeze(dim=-1) x3 = torch.cat([x1, x2], dim=-1) x3 = x3.reshape(B, N, -1) x3 = x3.permute(0, 2, 1) out = self.layer_3(x3) out = out + highway return out def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): return self.encoder(x_enc) def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): return self.encoder(x_enc) def anomaly_detection(self, x_enc): return self.encoder(x_enc) def classification(self, x_enc, x_mark_enc): enc_out = self.encoder(x_enc) # Output output = enc_out.reshape(enc_out.shape[0], -1) # (batch_size, seq_length * d_model) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/MICN.py ================================================ import torch import torch.nn as nn from layers.Embed import DataEmbedding from layers.Autoformer_EncDec import series_decomp, series_decomp_multi import torch.nn.functional as F class MIC(nn.Module): """ MIC layer to extract local and global features """ def __init__(self, feature_size=512, n_heads=8, dropout=0.05, decomp_kernel=[32], conv_kernel=[24], isometric_kernel=[18, 6], device='cuda'): super(MIC, self).__init__() self.conv_kernel = conv_kernel self.device = device # isometric convolution self.isometric_conv = nn.ModuleList([nn.Conv1d(in_channels=feature_size, out_channels=feature_size, kernel_size=i, padding=0, stride=1) for i in isometric_kernel]) # downsampling convolution: padding=i//2, stride=i self.conv = nn.ModuleList([nn.Conv1d(in_channels=feature_size, out_channels=feature_size, kernel_size=i, padding=i // 2, stride=i) for i in conv_kernel]) # upsampling convolution self.conv_trans = nn.ModuleList([nn.ConvTranspose1d(in_channels=feature_size, out_channels=feature_size, kernel_size=i, padding=0, stride=i) for i in conv_kernel]) self.decomp = nn.ModuleList([series_decomp(k) for k in decomp_kernel]) self.merge = torch.nn.Conv2d(in_channels=feature_size, out_channels=feature_size, kernel_size=(len(self.conv_kernel), 1)) # feedforward network self.conv1 = nn.Conv1d(in_channels=feature_size, out_channels=feature_size * 4, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=feature_size * 4, out_channels=feature_size, kernel_size=1) self.norm1 = nn.LayerNorm(feature_size) self.norm2 = nn.LayerNorm(feature_size) self.norm = torch.nn.LayerNorm(feature_size) self.act = torch.nn.Tanh() self.drop = torch.nn.Dropout(0.05) def conv_trans_conv(self, input, conv1d, conv1d_trans, isometric): batch, seq_len, channel = input.shape x = input.permute(0, 2, 1) # downsampling convolution x1 = self.drop(self.act(conv1d(x))) x = x1 # isometric convolution zeros = torch.zeros((x.shape[0], x.shape[1], x.shape[2] - 1), device=self.device) x = torch.cat((zeros, x), dim=-1) x = self.drop(self.act(isometric(x))) x = self.norm((x + x1).permute(0, 2, 1)).permute(0, 2, 1) # upsampling convolution x = self.drop(self.act(conv1d_trans(x))) x = x[:, :, :seq_len] # truncate x = self.norm(x.permute(0, 2, 1) + input) return x def forward(self, src): self.device = src.device # multi-scale multi = [] for i in range(len(self.conv_kernel)): src_out, trend1 = self.decomp[i](src) src_out = self.conv_trans_conv(src_out, self.conv[i], self.conv_trans[i], self.isometric_conv[i]) multi.append(src_out) # merge mg = torch.tensor([], device=self.device) for i in range(len(self.conv_kernel)): mg = torch.cat((mg, multi[i].unsqueeze(1).to(self.device)), dim=1) mg = self.merge(mg.permute(0, 3, 1, 2)).squeeze(-2).permute(0, 2, 1) y = self.norm1(mg) y = self.conv2(self.conv1(y.transpose(-1, 1))).transpose(-1, 1) return self.norm2(mg + y) class SeasonalPrediction(nn.Module): def __init__(self, embedding_size=512, n_heads=8, dropout=0.05, d_layers=1, decomp_kernel=[32], c_out=1, conv_kernel=[2, 4], isometric_kernel=[18, 6], device='cuda'): super(SeasonalPrediction, self).__init__() self.mic = nn.ModuleList([MIC(feature_size=embedding_size, n_heads=n_heads, decomp_kernel=decomp_kernel, conv_kernel=conv_kernel, isometric_kernel=isometric_kernel, device=device) for i in range(d_layers)]) self.projection = nn.Linear(embedding_size, c_out) def forward(self, dec): for mic_layer in self.mic: dec = mic_layer(dec) return self.projection(dec) class Model(nn.Module): """ Paper link: https://openreview.net/pdf?id=zt53IDUR1U """ def __init__(self, configs, conv_kernel=[12, 16]): """ conv_kernel: downsampling and upsampling convolution kernel_size """ super(Model, self).__init__() decomp_kernel = [] # kernel of decomposition operation isometric_kernel = [] # kernel of isometric convolution for ii in conv_kernel: if ii % 2 == 0: # the kernel of decomposition operation must be odd decomp_kernel.append(ii + 1) isometric_kernel.append((configs.seq_len + configs.pred_len + ii) // ii) else: decomp_kernel.append(ii) isometric_kernel.append((configs.seq_len + configs.pred_len + ii - 1) // ii) self.task_name = configs.task_name self.pred_len = configs.pred_len self.seq_len = configs.seq_len # Multiple Series decomposition block from FEDformer self.decomp_multi = series_decomp_multi(decomp_kernel) # embedding self.dec_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.conv_trans = SeasonalPrediction(embedding_size=configs.d_model, n_heads=configs.n_heads, dropout=configs.dropout, d_layers=configs.d_layers, decomp_kernel=decomp_kernel, c_out=configs.c_out, conv_kernel=conv_kernel, isometric_kernel=isometric_kernel, device=torch.device('cuda:0')) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': # refer to DLinear self.regression = nn.Linear(configs.seq_len, configs.pred_len) self.regression.weight = nn.Parameter( (1 / configs.pred_len) * torch.ones([configs.pred_len, configs.seq_len]), requires_grad=True) if self.task_name == 'imputation': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.c_out * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Multi-scale Hybrid Decomposition seasonal_init_enc, trend = self.decomp_multi(x_enc) trend = self.regression(trend.permute(0, 2, 1)).permute(0, 2, 1) # embedding zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) seasonal_init_dec = torch.cat([seasonal_init_enc[:, -self.seq_len:, :], zeros], dim=1) dec_out = self.dec_embedding(seasonal_init_dec, x_mark_dec) dec_out = self.conv_trans(dec_out) dec_out = dec_out[:, -self.pred_len:, :] + trend[:, -self.pred_len:, :] return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # Multi-scale Hybrid Decomposition seasonal_init_enc, trend = self.decomp_multi(x_enc) # embedding dec_out = self.dec_embedding(seasonal_init_enc, x_mark_dec) dec_out = self.conv_trans(dec_out) dec_out = dec_out + trend return dec_out def anomaly_detection(self, x_enc): # Multi-scale Hybrid Decomposition seasonal_init_enc, trend = self.decomp_multi(x_enc) # embedding dec_out = self.dec_embedding(seasonal_init_enc, None) dec_out = self.conv_trans(dec_out) dec_out = dec_out + trend return dec_out def classification(self, x_enc, x_mark_enc): # Multi-scale Hybrid Decomposition seasonal_init_enc, trend = self.decomp_multi(x_enc) # embedding dec_out = self.dec_embedding(seasonal_init_enc, None) dec_out = self.conv_trans(dec_out) dec_out = dec_out + trend # Output from Non-stationary Transformer output = self.act(dec_out) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/MSGNet.py ================================================ import numpy as np # import pywt import torch import torch.nn as nn import torch.nn.functional as F import torch.fft from layers.Embed import DataEmbedding from layers.MSGBlock import GraphBlock, simpleVIT, Attention_Block, Predict def FFT_for_Period(x, k=2): # [B, T, C] xf = torch.fft.rfft(x, dim=1) frequency_list = abs(xf).mean(0).mean(-1) frequency_list[0] = 0 _, top_list = torch.topk(frequency_list, k) top_list = top_list.detach().cpu().numpy() period = x.shape[1] // top_list return period, abs(xf).mean(-1)[:, top_list] class ScaleGraphBlock(nn.Module): def __init__(self, configs): super(ScaleGraphBlock, self).__init__() self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.k = configs.top_k self.att0 = Attention_Block(configs.d_model, configs.d_ff, n_heads=configs.n_heads, dropout=configs.dropout, activation="gelu") self.norm = nn.LayerNorm(configs.d_model) self.gelu = nn.GELU() self.gconv = nn.ModuleList() for i in range(self.k): self.gconv.append( GraphBlock(configs.c_out , configs.d_model , configs.conv_channel, configs.skip_channel, configs.gcn_depth , configs.dropout, configs.propalpha ,configs.seq_len, configs.node_dim)) def forward(self, x): B, T, N = x.size() scale_list, scale_weight = FFT_for_Period(x, self.k) res = [] for i in range(self.k): scale = scale_list[i] #Gconv x = self.gconv[i](x) # paddng if (self.seq_len) % scale != 0: length = (((self.seq_len) // scale) + 1) * scale padding = torch.zeros([x.shape[0], (length - (self.seq_len)), x.shape[2]]).to(x.device) out = torch.cat([x, padding], dim=1) else: length = self.seq_len out = x out = out.reshape(B, length // scale, scale, N) #for Mul-attetion out = out.reshape(-1 , scale , N) out = self.norm(self.att0(out)) out = self.gelu(out) out = out.reshape(B, -1 , scale , N).reshape(B ,-1 ,N) # #for simpleVIT # out = self.att(out.permute(0, 3, 1, 2).contiguous()) #return # out = out.permute(0, 2, 3, 1).reshape(B, -1 ,N) out = out[:, :self.seq_len, :] res.append(out) res = torch.stack(res, dim=-1) # adaptive aggregation scale_weight = F.softmax(scale_weight, dim=1) scale_weight = scale_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1) res = torch.sum(res * scale_weight, -1) # residual connection res = res + x return res class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.configs = configs self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.device = "cuda" if torch.cuda.is_available() else "cpu" # for graph # self.num_nodes = configs.c_out # self.subgraph_size = configs.subgraph_size # self.node_dim = configs.node_dim # to return adj (node , node) # self.graph = constructor_graph() self.model = nn.ModuleList([ScaleGraphBlock(configs) for _ in range(configs.e_layers)]) self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.layer = configs.e_layers self.layer_norm = nn.LayerNorm(configs.d_model) self.predict_linear = nn.Linear( self.seq_len, self.pred_len + self.seq_len) self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) self.seq2pred = Predict(configs.individual, configs.c_out, configs.seq_len, configs.pred_len, configs.dropout) if self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.d_model * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] # adp = self.graph(torch.arange(self.num_nodes).to(self.device)) for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # porject back dec_out = self.projection(enc_out) dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len, 1)) return dec_out[:, -self.pred_len:, :] def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, L, N = x_enc.shape # embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] # adp = self.graph(torch.arange(self.num_nodes).to(self.device)) for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # porject back dec_out = self.projection(enc_out) # dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2) # print(dec_out.shape) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat( 1, L, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat( 1, L, 1)) return dec_out def anomaly_detection(self, x_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, L, N = x_enc.shape # embedding enc_out = self.enc_embedding(x_enc, None) # [B,T,C] # adp = self.graph(torch.arange(self.num_nodes).to(self.device)) for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # porject back dec_out = self.projection(enc_out) # dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2) # print(dec_out.shape) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat( 1, L, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat( 1, L, 1)) return dec_out def classification(self, x_enc, x_mark_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # embedding enc_out = self.enc_embedding(x_enc, None) # [B,T,C] # adp = self.graph(torch.arange(self.num_nodes).to(self.device)) for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) output = self.act(enc_out) output = self.dropout(output) # zero-out padding embeddings output = output * x_mark_enc.unsqueeze(-1) # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/Mamba.py ================================================ import math import torch import torch.nn as nn import torch.nn.functional as F from mamba_ssm import Mamba from layers.Embed import DataEmbedding class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len self.d_inner = configs.d_model * configs.expand self.dt_rank = math.ceil(configs.d_model / 16) # TODO implement "auto" self.embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.mamba = Mamba( d_model = configs.d_model, d_state = configs.d_ff, d_conv = configs.d_conv, expand = configs.expand, ) self.out_layer = nn.Linear(configs.d_model, configs.c_out, bias=False) def forecast(self, x_enc, x_mark_enc): mean_enc = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() x_enc = x_enc / std_enc x = self.embedding(x_enc, x_mark_enc) x = self.mamba(x) x_out = self.out_layer(x) x_out = x_out * std_enc + mean_enc return x_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name in ['short_term_forecast', 'long_term_forecast']: x_out = self.forecast(x_enc, x_mark_enc) return x_out[:, -self.pred_len:, :] # other tasks not implemented ================================================ FILE: models/MambaSimple.py ================================================ import math import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange, repeat, einsum from layers.Embed import DataEmbedding class Model(nn.Module): """ Mamba, linear-time sequence modeling with selective state spaces O(L) Paper link: https://arxiv.org/abs/2312.00752 Implementation refernce: https://github.com/johnma2006/mamba-minimal/ """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len self.d_inner = configs.d_model * configs.expand self.dt_rank = math.ceil(configs.d_model / 16) self.embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.layers = nn.ModuleList([ResidualBlock(configs, self.d_inner, self.dt_rank) for _ in range(configs.e_layers)]) self.norm = RMSNorm(configs.d_model) self.out_layer = nn.Linear(configs.d_model, configs.c_out, bias=False) def forecast(self, x_enc, x_mark_enc): mean_enc = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() x_enc = x_enc / std_enc x = self.embedding(x_enc, x_mark_enc) for layer in self.layers: x = layer(x) x = self.norm(x) x_out = self.out_layer(x) x_out = x_out * std_enc + mean_enc return x_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name in ['short_term_forecast', 'long_term_forecast']: x_out = self.forecast(x_enc, x_mark_enc) return x_out[:, -self.pred_len:, :] class ResidualBlock(nn.Module): def __init__(self, configs, d_inner, dt_rank): super(ResidualBlock, self).__init__() self.mixer = MambaBlock(configs, d_inner, dt_rank) self.norm = RMSNorm(configs.d_model) def forward(self, x): output = self.mixer(self.norm(x)) + x return output class MambaBlock(nn.Module): def __init__(self, configs, d_inner, dt_rank): super(MambaBlock, self).__init__() self.d_inner = d_inner self.dt_rank = dt_rank self.in_proj = nn.Linear(configs.d_model, self.d_inner * 2, bias=False) self.conv1d = nn.Conv1d( in_channels = self.d_inner, out_channels = self.d_inner, bias = True, kernel_size = configs.d_conv, padding = configs.d_conv - 1, groups = self.d_inner ) # takes in x and outputs the input-specific delta, B, C self.x_proj = nn.Linear(self.d_inner, self.dt_rank + configs.d_ff * 2, bias=False) # projects delta self.dt_proj = nn.Linear(self.dt_rank, self.d_inner, bias=True) A = repeat(torch.arange(1, configs.d_ff + 1), "n -> d n", d=self.d_inner).float() self.A_log = nn.Parameter(torch.log(A)) self.D = nn.Parameter(torch.ones(self.d_inner)) self.out_proj = nn.Linear(self.d_inner, configs.d_model, bias=False) def forward(self, x): """ Figure 3 in Section 3.4 in the paper """ (b, l, d) = x.shape x_and_res = self.in_proj(x) # [B, L, 2 * d_inner] (x, res) = x_and_res.split(split_size=[self.d_inner, self.d_inner], dim=-1) x = rearrange(x, "b l d -> b d l") x = self.conv1d(x)[:, :, :l] x = rearrange(x, "b d l -> b l d") x = F.silu(x) y = self.ssm(x) y = y * F.silu(res) output = self.out_proj(y) return output def ssm(self, x): """ Algorithm 2 in Section 3.2 in the paper """ (d_in, n) = self.A_log.shape A = -torch.exp(self.A_log.float()) # [d_in, n] D = self.D.float() # [d_in] x_dbl = self.x_proj(x) # [B, L, d_rank + 2 * d_ff] (delta, B, C) = x_dbl.split(split_size=[self.dt_rank, n, n], dim=-1) # delta: [B, L, d_rank]; B, C: [B, L, n] delta = F.softplus(self.dt_proj(delta)) # [B, L, d_in] y = self.selective_scan(x, delta, A, B, C, D) return y def selective_scan(self, u, delta, A, B, C, D): (b, l, d_in) = u.shape n = A.shape[1] deltaA = torch.exp(einsum(delta, A, "b l d, d n -> b l d n")) # A is discretized using zero-order hold (ZOH) discretization deltaB_u = einsum(delta, B, u, "b l d, b l n, b l d -> b l d n") # B is discretized using a simplified Euler discretization instead of ZOH. From a discussion with authors: "A is the more important term and the performance doesn't change much with the simplification on B" # selective scan, sequential instead of parallel x = torch.zeros((b, d_in, n), device=deltaA.device) ys = [] for i in range(l): x = deltaA[:, i] * x + deltaB_u[:, i] y = einsum(x, C[:, i, :], "b d n, b n -> b d") ys.append(y) y = torch.stack(ys, dim=1) # [B, L, d_in] y = y + u * D return y class RMSNorm(nn.Module): def __init__(self, d_model, eps=1e-5): super(RMSNorm, self).__init__() self.eps = eps self.weight = nn.Parameter(torch.ones(d_model)) def forward(self, x): output = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) * self.weight return output ================================================ FILE: models/MambaSingleLayer.py ================================================ import torch.nn as nn from layers.Embed import PositionalEmbedding from layers.MambaBlock import Mamba_TimeVariant class TokenEmbedding_cls(nn.Module): """TokenEmbedding with configurable kernel size(`d_kernel`). """ def __init__(self, c_in, d_model, d_kernel=3): super().__init__() self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, kernel_size=d_kernel, padding='same', padding_mode='replicate', bias=False) for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_( m.weight, mode='fan_in', nonlinearity='leaky_relu') def forward(self, x): x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) return x class DataEmbedding_cls(nn.Module): """DataEmbedding with configurable kernel size(`d_kernel`) and sequence length(`seq_len`). To solve the warning for EigenWorms dataset (seq_len=17984) while keeping consistency comparing with other models, we set max_len=max(5000, seq_len).""" def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1, d_kernel=3, seq_len=5000): super(DataEmbedding_cls, self).__init__() self.value_embedding = TokenEmbedding_cls(c_in=c_in, d_model=d_model, d_kernel=d_kernel) self.position_embedding = PositionalEmbedding(d_model=d_model, max_len=max(5000, seq_len)) self.dropout = nn.Dropout(p=dropout) if dropout > 0 else nn.Identity() def forward(self, x): x = self.value_embedding(x) + self.position_embedding(x) return self.dropout(x) class Model(nn.Module): """MambaSL: Exploring Single-Layer Mamba for Time Series Classification - Paper Link: https://openreview.net/pdf?id=YDl4vqQqGP - Original Repo: https://github.com/yoom618/MambaSL. removed all extra codes for ablation study and further analysis. """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.c_out = configs.c_out self.dropout = configs.dropout self.num_kernels = configs.num_kernels self.mamba = nn.Sequential( Mamba_TimeVariant( d_model = configs.d_model, d_state = configs.d_ff, d_conv = configs.d_conv, expand = configs.expand, timevariant_dt = bool(configs.tv_dt), # only available in Mamba_TimeVariant timevariant_B = bool(configs.tv_B), # only available in Mamba_TimeVariant timevariant_C = bool(configs.tv_C), # only available in Mamba_TimeVariant use_D = bool(configs.use_D), # use D(skip connection) or not device = configs.device, ), nn.LayerNorm(configs.d_model), nn.SiLU(), # simply choose the same activation fn as Mamba Block ) if self.task_name in ['classification']: # one class per one sequence sample self.embedding = DataEmbedding_cls(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout, configs.num_kernels, configs.seq_len) self.out_layer = nn.Sequential( nn.Dropout(configs.dropout), nn.Linear(configs.d_model, configs.num_class, bias=False) ) nn.init.xavier_uniform_(self.out_layer[1].weight) self.attn_weight = nn.Sequential( nn.Linear(configs.d_model, configs.n_heads, bias=True), nn.AdaptiveMaxPool1d(1), nn.Softmax(dim=1), ) for m in self.attn_weight.modules(): if isinstance(m, nn.Linear): nn.init.zeros_(m.weight) if m.bias is not None: m.bias.data.fill_(1.0) else: raise ValueError(f"task_name: {configs.task_name} is not valid.") def forward(self, x_enc, x_mark_enc, x_dec=None, x_mark_dec=None, mask=None): if self.task_name in ['classification']: mamba_in = self.embedding(x_enc) # (B, L_in, D) mamba_out = self.mamba(mamba_in) # (B, L_in, D) ### [proposed] use the gating value to make the final prediction logit_out = self.out_layer(mamba_out) # (B, L_in, D) -> (B, L_in, C_out) logit_out *= x_mark_enc.unsqueeze(2) # (B, L_in, C_out) # Mask out the padded sequence for variable length data (e.g. JapaneseVowels) ### Compute attention weights for weighted sum of logit_out w_out = self.attn_weight(mamba_out) # (B, L_in, D) -> (B, L_in, n_head) -> (B, L_in, 1) ### calculate the weighted average of the hidden states to make the final prediction out = logit_out * w_out # (B, L_in, C_out) out = out.sum(1) # (B, C_out) return out else: raise ValueError(f"task_name: {self.task_name} is not valid.") ================================================ FILE: models/Moirai.py ================================================ import numpy as np import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding from uni2ts.eval_util.plot import plot_single from uni2ts.model.moirai import MoiraiForecast, MoiraiModule from uni2ts.model.moirai_moe import MoiraiMoEForecast, MoiraiMoEModule from uni2ts.model.moirai2 import Moirai2Forecast, Moirai2Module class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = Moirai2Forecast( module=Moirai2Module.from_pretrained( f"Salesforce/moirai-2.0-R-small", ), prediction_length=configs.pred_len, context_length=configs.seq_len, target_dim=1, feat_dynamic_real_dim=0, past_feat_dynamic_real_dim=0, ).to('cuda') self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): outputs = [] for i in range(x_enc.shape[-1]): output = self.model.predict(x_enc[...,i].cpu().numpy()) output = np.mean(output, axis=1) outputs.append(torch.Tensor(output).to(x_enc.device)) dec_out = torch.stack(outputs, dim=-1) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/MultiPatchFormer.py ================================================ import torch import torch.nn as nn import math from einops import rearrange from layers.SelfAttention_Family import AttentionLayer, FullAttention class FeedForward(nn.Module): def __init__(self, d_model: int, d_hidden: int = 512): super(FeedForward, self).__init__() self.linear_1 = torch.nn.Linear(d_model, d_hidden) self.linear_2 = torch.nn.Linear(d_hidden, d_model) self.activation = torch.nn.GELU() def forward(self, x): x = self.linear_1(x) x = self.activation(x) x = self.linear_2(x) return x class Encoder(nn.Module): def __init__( self, d_model: int, mha: AttentionLayer, d_hidden: int, dropout: float = 0, channel_wise=False, ): super(Encoder, self).__init__() self.channel_wise = channel_wise if self.channel_wise: self.conv = torch.nn.Conv1d( in_channels=d_model, out_channels=d_model, kernel_size=1, stride=1, padding=0, padding_mode="reflect", ) self.MHA = mha self.feedforward = FeedForward(d_model=d_model, d_hidden=d_hidden) self.dropout = torch.nn.Dropout(p=dropout) self.layerNormal_1 = torch.nn.LayerNorm(d_model) self.layerNormal_2 = torch.nn.LayerNorm(d_model) def forward(self, x): residual = x q = residual if self.channel_wise: x_r = self.conv(x.permute(0, 2, 1)).transpose(1, 2) k = x_r v = x_r else: k = residual v = residual x, score = self.MHA(q, k, v, attn_mask=None) x = self.dropout(x) x = self.layerNormal_1(x + residual) residual = x x = self.feedforward(residual) x = self.dropout(x) x = self.layerNormal_2(x + residual) return x, score class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.d_channel = configs.enc_in self.N = configs.e_layers # Embedding self.d_model = configs.d_model self.d_hidden = configs.d_ff self.n_heads = configs.n_heads self.mask = True self.dropout = configs.dropout self.stride1 = 8 self.patch_len1 = 8 self.stride2 = 8 self.patch_len2 = 16 self.stride3 = 7 self.patch_len3 = 24 self.stride4 = 6 self.patch_len4 = 32 self.patch_num1 = int((self.seq_len - self.patch_len2) // self.stride2) + 2 self.padding_patch_layer1 = nn.ReplicationPad1d((0, self.stride1)) self.padding_patch_layer2 = nn.ReplicationPad1d((0, self.stride2)) self.padding_patch_layer3 = nn.ReplicationPad1d((0, self.stride3)) self.padding_patch_layer4 = nn.ReplicationPad1d((0, self.stride4)) self.shared_MHA = nn.ModuleList( [ AttentionLayer( FullAttention(mask_flag=self.mask), d_model=self.d_model, n_heads=self.n_heads, ) for _ in range(self.N) ] ) self.shared_MHA_ch = nn.ModuleList( [ AttentionLayer( FullAttention(mask_flag=self.mask), d_model=self.d_model, n_heads=self.n_heads, ) for _ in range(self.N) ] ) self.encoder_list = nn.ModuleList( [ Encoder( d_model=self.d_model, mha=self.shared_MHA[ll], d_hidden=self.d_hidden, dropout=self.dropout, channel_wise=False, ) for ll in range(self.N) ] ) self.encoder_list_ch = nn.ModuleList( [ Encoder( d_model=self.d_model, mha=self.shared_MHA_ch[0], d_hidden=self.d_hidden, dropout=self.dropout, channel_wise=True, ) for ll in range(self.N) ] ) pe = torch.zeros(self.patch_num1, self.d_model) for pos in range(self.patch_num1): for i in range(0, self.d_model, 2): wavelength = 10000 ** ((2 * i) / self.d_model) pe[pos, i] = math.sin(pos / wavelength) pe[pos, i + 1] = math.cos(pos / wavelength) pe = pe.unsqueeze(0) # add a batch dimention to your pe matrix self.register_buffer("pe", pe) self.embedding_channel = nn.Conv1d( in_channels=self.d_model * self.patch_num1, out_channels=self.d_model, kernel_size=1, ) self.embedding_patch_1 = torch.nn.Conv1d( in_channels=1, out_channels=self.d_model // 4, kernel_size=self.patch_len1, stride=self.stride1, ) self.embedding_patch_2 = torch.nn.Conv1d( in_channels=1, out_channels=self.d_model // 4, kernel_size=self.patch_len2, stride=self.stride2, ) self.embedding_patch_3 = torch.nn.Conv1d( in_channels=1, out_channels=self.d_model // 4, kernel_size=self.patch_len3, stride=self.stride3, ) self.embedding_patch_4 = torch.nn.Conv1d( in_channels=1, out_channels=self.d_model // 4, kernel_size=self.patch_len4, stride=self.stride4, ) self.out_linear_1 = torch.nn.Linear(self.d_model, self.pred_len // 8) self.out_linear_2 = torch.nn.Linear( self.d_model + self.pred_len // 8, self.pred_len // 8 ) self.out_linear_3 = torch.nn.Linear( self.d_model + 2 * self.pred_len // 8, self.pred_len // 8 ) self.out_linear_4 = torch.nn.Linear( self.d_model + 3 * self.pred_len // 8, self.pred_len // 8 ) self.out_linear_5 = torch.nn.Linear( self.d_model + self.pred_len // 2, self.pred_len // 8 ) self.out_linear_6 = torch.nn.Linear( self.d_model + 5 * self.pred_len // 8, self.pred_len // 8 ) self.out_linear_7 = torch.nn.Linear( self.d_model + 6 * self.pred_len // 8, self.pred_len // 8 ) self.out_linear_8 = torch.nn.Linear( self.d_model + 7 * self.pred_len // 8, self.pred_len - 7 * (self.pred_len // 8), ) self.remap = torch.nn.Linear(self.d_model, self.seq_len) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # Multi-scale embedding x_i = x_enc.permute(0, 2, 1) x_i_p1 = x_i x_i_p2 = self.padding_patch_layer2(x_i) x_i_p3 = self.padding_patch_layer3(x_i) x_i_p4 = self.padding_patch_layer4(x_i) encoding_patch1 = self.embedding_patch_1( rearrange(x_i_p1, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1) ).permute(0, 2, 1) encoding_patch2 = self.embedding_patch_2( rearrange(x_i_p2, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1) ).permute(0, 2, 1) encoding_patch3 = self.embedding_patch_3( rearrange(x_i_p3, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1) ).permute(0, 2, 1) encoding_patch4 = self.embedding_patch_4( rearrange(x_i_p4, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1) ).permute(0, 2, 1) encoding_patch = ( torch.cat( (encoding_patch1, encoding_patch2, encoding_patch3, encoding_patch4), dim=-1, ) + self.pe ) # Temporal encoding for i in range(self.N): encoding_patch = self.encoder_list[i](encoding_patch)[0] # Channel-wise encoding x_patch_c = rearrange( encoding_patch, "(b c) p d -> b c (p d)", b=x_enc.shape[0], c=self.d_channel ) x_ch = self.embedding_channel(x_patch_c.permute(0, 2, 1)).transpose( 1, 2 ) # [b c d] encoding_1_ch = self.encoder_list_ch[0](x_ch)[0] # Semi Auto-regressive forecast_ch1 = self.out_linear_1(encoding_1_ch) forecast_ch2 = self.out_linear_2( torch.cat((encoding_1_ch, forecast_ch1), dim=-1) ) forecast_ch3 = self.out_linear_3( torch.cat((encoding_1_ch, forecast_ch1, forecast_ch2), dim=-1) ) forecast_ch4 = self.out_linear_4( torch.cat((encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3), dim=-1) ) forecast_ch5 = self.out_linear_5( torch.cat( (encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4), dim=-1, ) ) forecast_ch6 = self.out_linear_6( torch.cat( ( encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4, forecast_ch5, ), dim=-1, ) ) forecast_ch7 = self.out_linear_7( torch.cat( ( encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4, forecast_ch5, forecast_ch6, ), dim=-1, ) ) forecast_ch8 = self.out_linear_8( torch.cat( ( encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4, forecast_ch5, forecast_ch6, forecast_ch7, ), dim=-1, ) ) final_forecast = torch.cat( ( forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4, forecast_ch5, forecast_ch6, forecast_ch7, forecast_ch8, ), dim=-1, ).permute(0, 2, 1) # De-Normalization dec_out = final_forecast * ( stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len, 1) ) dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if ( self.task_name == "long_term_forecast" or self.task_name == "short_term_forecast" ): dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len :, :] # [B, L, D] if self.task_name == "imputation": raise NotImplementedError( "Task imputation for WPMixer is temporarily not supported" ) if self.task_name == "anomaly_detection": raise NotImplementedError( "Task anomaly_detection for WPMixer is temporarily not supported" ) if self.task_name == "classification": raise NotImplementedError( "Task classification for WPMixer is temporarily not supported" ) return None ================================================ FILE: models/Nonstationary_Transformer.py ================================================ import torch import torch.nn as nn from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer from layers.SelfAttention_Family import DSAttention, AttentionLayer from layers.Embed import DataEmbedding import torch.nn.functional as F class Projector(nn.Module): ''' MLP to learn the De-stationary factors Paper link: https://openreview.net/pdf?id=ucNDIDRNjjv ''' def __init__(self, enc_in, seq_len, hidden_dims, hidden_layers, output_dim, kernel_size=3): super(Projector, self).__init__() padding = 1 if torch.__version__ >= '1.5.0' else 2 self.series_conv = nn.Conv1d(in_channels=seq_len, out_channels=1, kernel_size=kernel_size, padding=padding, padding_mode='circular', bias=False) layers = [nn.Linear(2 * enc_in, hidden_dims[0]), nn.ReLU()] for i in range(hidden_layers - 1): layers += [nn.Linear(hidden_dims[i], hidden_dims[i + 1]), nn.ReLU()] layers += [nn.Linear(hidden_dims[-1], output_dim, bias=False)] self.backbone = nn.Sequential(*layers) def forward(self, x, stats): # x: B x S x E # stats: B x 1 x E # y: B x O batch_size = x.shape[0] x = self.series_conv(x) # B x 1 x E x = torch.cat([x, stats], dim=1) # B x 2 x E x = x.view(batch_size, -1) # B x 2E y = self.backbone(x) # B x O return y class Model(nn.Module): """ Paper link: https://openreview.net/pdf?id=ucNDIDRNjjv """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len self.seq_len = configs.seq_len self.label_len = configs.label_len # Embedding self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( DSAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model) ) # Decoder if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.decoder = Decoder( [ DecoderLayer( AttentionLayer( DSAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AttentionLayer( DSAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) if self.task_name == 'imputation': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) self.tau_learner = Projector(enc_in=configs.enc_in, seq_len=configs.seq_len, hidden_dims=configs.p_hidden_dims, hidden_layers=configs.p_hidden_layers, output_dim=1) self.delta_learner = Projector(enc_in=configs.enc_in, seq_len=configs.seq_len, hidden_dims=configs.p_hidden_dims, hidden_layers=configs.p_hidden_layers, output_dim=configs.seq_len) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): x_raw = x_enc.clone().detach() # Normalization mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E x_enc = x_enc / std_enc # B x S x E, B x 1 x E -> B x 1, positive scalar tau = self.tau_learner(x_raw, std_enc) threshold = 80.0 tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow tau = tau_clamped.exp() # B x S x E, B x 1 x E -> B x S delta = self.delta_learner(x_raw, mean_enc) x_dec_new = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_dec[:, -self.pred_len:, :])], dim=1).to(x_enc.device).clone() enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta) dec_out = self.dec_embedding(x_dec_new, x_mark_dec) dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, tau=tau, delta=delta) dec_out = dec_out * std_enc + mean_enc return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): x_raw = x_enc.clone().detach() # Normalization mean_enc = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1) mean_enc = mean_enc.unsqueeze(1).detach() x_enc = x_enc - mean_enc x_enc = x_enc.masked_fill(mask == 0, 0) std_enc = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5) std_enc = std_enc.unsqueeze(1).detach() x_enc /= std_enc # B x S x E, B x 1 x E -> B x 1, positive scalar tau = self.tau_learner(x_raw, std_enc) threshold = 80.0 tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow tau = tau_clamped.exp() # B x S x E, B x 1 x E -> B x S delta = self.delta_learner(x_raw, mean_enc) enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta) dec_out = self.projection(enc_out) dec_out = dec_out * std_enc + mean_enc return dec_out def anomaly_detection(self, x_enc): x_raw = x_enc.clone().detach() # Normalization mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E x_enc = x_enc / std_enc # B x S x E, B x 1 x E -> B x 1, positive scalar tau = self.tau_learner(x_raw, std_enc) threshold = 80.0 tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow tau = tau_clamped.exp() # B x S x E, B x 1 x E -> B x S delta = self.delta_learner(x_raw, mean_enc) # embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta) dec_out = self.projection(enc_out) dec_out = dec_out * std_enc + mean_enc return dec_out def classification(self, x_enc, x_mark_enc): x_raw = x_enc.clone().detach() # Normalization mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E std_enc = torch.sqrt( torch.var(x_enc - mean_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E # B x S x E, B x 1 x E -> B x 1, positive scalar tau = self.tau_learner(x_raw, std_enc) threshold = 80.0 tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow tau = tau_clamped.exp() # B x S x E, B x 1 x E -> B x S delta = self.delta_learner(x_raw, mean_enc) # embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta) # Output output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) # (batch_size, num_classes) output = self.projection(output) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, L, D] return None ================================================ FILE: models/PAttn.py ================================================ import torch import torch.nn as nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from einops import rearrange class Model(nn.Module): """ Paper link: https://arxiv.org/abs/2406.16964 """ def __init__(self, configs, patch_len=16, stride=8): super().__init__() self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.patch_size = patch_len self.stride = stride self.d_model = configs.d_model self.patch_num = (configs.seq_len - self.patch_size) // self.stride + 2 self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride)) self.in_layer = nn.Linear(self.patch_size, self.d_model) self.encoder = Encoder( [ EncoderLayer( AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(1) ], norm_layer=nn.LayerNorm(configs.d_model) ) self.out_layer = nn.Linear(self.d_model * self.patch_num, configs.pred_len) def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec): means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev B, _, C = x_enc.shape x_enc = x_enc.permute(0, 2, 1) x_enc = self.padding_patch_layer(x_enc) x_enc = x_enc.unfold(dimension=-1, size=self.patch_size, step=self.stride) enc_out = self.in_layer(x_enc) enc_out = rearrange(enc_out, 'b c m l -> (b c) m l') dec_out, _ = self.encoder(enc_out) dec_out = rearrange(dec_out, '(b c) m l -> b c (m l)' , b=B , c=C) dec_out = self.out_layer(dec_out) dec_out = dec_out.permute(0, 2, 1) dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out ================================================ FILE: models/PatchTST.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding class Transpose(nn.Module): def __init__(self, *dims, contiguous=False): super().__init__() self.dims, self.contiguous = dims, contiguous def forward(self, x): if self.contiguous: return x.transpose(*self.dims).contiguous() else: return x.transpose(*self.dims) class FlattenHead(nn.Module): def __init__(self, n_vars, nf, target_window, head_dropout=0): super().__init__() self.n_vars = n_vars self.flatten = nn.Flatten(start_dim=-2) self.linear = nn.Linear(nf, target_window) self.dropout = nn.Dropout(head_dropout) def forward(self, x): # x: [bs x nvars x d_model x patch_num] x = self.flatten(x) x = self.linear(x) x = self.dropout(x) return x class Model(nn.Module): """ Paper link: https://arxiv.org/pdf/2211.14730.pdf """ def __init__(self, configs, patch_len=16, stride=8): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len padding = stride # patching and embedding self.patch_embedding = PatchEmbedding( configs.d_model, patch_len, stride, padding, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=nn.Sequential(Transpose(1,2), nn.BatchNorm1d(configs.d_model), Transpose(1,2)) ) # Prediction Head self.head_nf = configs.d_model * \ int((configs.seq_len - patch_len) / stride + 2) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len, head_dropout=configs.dropout) elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len, head_dropout=configs.dropout) elif self.task_name == 'classification': self.flatten = nn.Flatten(start_dim=-2) self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( self.head_nf * configs.enc_in, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # do patching and embedding x_enc = x_enc.permute(0, 2, 1) # u: [bs * nvars x patch_num x d_model] enc_out, n_vars = self.patch_embedding(x_enc) # Encoder # z: [bs * nvars x patch_num x d_model] enc_out, attns = self.encoder(enc_out) # z: [bs x nvars x patch_num x d_model] enc_out = torch.reshape( enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) # z: [bs x nvars x d_model x patch_num] enc_out = enc_out.permute(0, 1, 3, 2) # Decoder dec_out = self.head(enc_out) # z: [bs x nvars x target_window] dec_out = dec_out.permute(0, 2, 1) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # Normalization from Non-stationary Transformer means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1) means = means.unsqueeze(1).detach() x_enc = x_enc - means x_enc = x_enc.masked_fill(mask == 0, 0) stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5) stdev = stdev.unsqueeze(1).detach() x_enc /= stdev # do patching and embedding x_enc = x_enc.permute(0, 2, 1) # u: [bs * nvars x patch_num x d_model] enc_out, n_vars = self.patch_embedding(x_enc) # Encoder # z: [bs * nvars x patch_num x d_model] enc_out, attns = self.encoder(enc_out) # z: [bs x nvars x patch_num x d_model] enc_out = torch.reshape( enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) # z: [bs x nvars x d_model x patch_num] enc_out = enc_out.permute(0, 1, 3, 2) # Decoder dec_out = self.head(enc_out) # z: [bs x nvars x target_window] dec_out = dec_out.permute(0, 2, 1) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1)) return dec_out def anomaly_detection(self, x_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # do patching and embedding x_enc = x_enc.permute(0, 2, 1) # u: [bs * nvars x patch_num x d_model] enc_out, n_vars = self.patch_embedding(x_enc) # Encoder # z: [bs * nvars x patch_num x d_model] enc_out, attns = self.encoder(enc_out) # z: [bs x nvars x patch_num x d_model] enc_out = torch.reshape( enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) # z: [bs x nvars x d_model x patch_num] enc_out = enc_out.permute(0, 1, 3, 2) # Decoder dec_out = self.head(enc_out) # z: [bs x nvars x target_window] dec_out = dec_out.permute(0, 2, 1) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1)) return dec_out def classification(self, x_enc, x_mark_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # do patching and embedding x_enc = x_enc.permute(0, 2, 1) # u: [bs * nvars x patch_num x d_model] enc_out, n_vars = self.patch_embedding(x_enc) # Encoder # z: [bs * nvars x patch_num x d_model] enc_out, attns = self.encoder(enc_out) # z: [bs x nvars x patch_num x d_model] enc_out = torch.reshape( enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) # z: [bs x nvars x d_model x patch_num] enc_out = enc_out.permute(0, 1, 3, 2) # Decoder output = self.flatten(enc_out) output = self.dropout(output) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/Pyraformer.py ================================================ import torch import torch.nn as nn from layers.Pyraformer_EncDec import Encoder class Model(nn.Module): """ Pyraformer: Pyramidal attention to reduce complexity Paper link: https://openreview.net/pdf?id=0EXmFzUn5I """ def __init__(self, configs, window_size=[4,4], inner_size=5): """ window_size: list, the downsample window size in pyramidal attention. inner_size: int, the size of neighbour attention """ super().__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len self.d_model = configs.d_model if self.task_name == 'short_term_forecast': window_size = [2,2] self.encoder = Encoder(configs, window_size, inner_size) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.projection = nn.Linear( (len(window_size)+1)*self.d_model, self.pred_len * configs.enc_in) elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.projection = nn.Linear( (len(window_size)+1)*self.d_model, configs.enc_in, bias=True) elif self.task_name == 'classification': self.act = torch.nn.functional.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( (len(window_size)+1)*self.d_model * configs.seq_len, configs.num_class) def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :] dec_out = self.projection(enc_out).view( enc_out.size(0), self.pred_len, -1) return dec_out def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): # Normalization mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E x_enc = x_enc / std_enc enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :] dec_out = self.projection(enc_out).view( enc_out.size(0), self.pred_len, -1) dec_out = dec_out * std_enc + mean_enc return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): enc_out = self.encoder(x_enc, x_mark_enc) dec_out = self.projection(enc_out) return dec_out def anomaly_detection(self, x_enc, x_mark_enc): enc_out = self.encoder(x_enc, x_mark_enc) dec_out = self.projection(enc_out) return dec_out def classification(self, x_enc, x_mark_enc): # enc enc_out = self.encoder(x_enc, x_mark_enc=None) # Output # the output transformer encoder/decoder embeddings don't include non-linearity output = self.act(enc_out) output = self.dropout(output) # zero-out padding embeddings output = output * x_mark_enc.unsqueeze(-1) # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast': dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'short_term_forecast': dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc, x_mark_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/Reformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import ReformerLayer from layers.Embed import DataEmbedding class Model(nn.Module): """ Reformer with O(LlogL) complexity Paper link: https://openreview.net/forum?id=rkgNKkHtvB """ def __init__(self, configs, bucket_size=4, n_hashes=4): """ bucket_size: int, n_hashes: int, """ super(Model, self).__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len self.seq_len = configs.seq_len self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( ReformerLayer(None, configs.d_model, configs.n_heads, bucket_size=bucket_size, n_hashes=n_hashes), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model) ) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.d_model * configs.seq_len, configs.num_class) else: self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # add placeholder x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) if x_mark_enc is not None: x_mark_enc = torch.cat( [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out) return dec_out # [B, L, D] def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E x_enc = x_enc - mean_enc std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E x_enc = x_enc / std_enc # add placeholder x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) if x_mark_enc is not None: x_mark_enc = torch.cat( [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out) dec_out = dec_out * std_enc + mean_enc return dec_out # [B, L, D] def imputation(self, x_enc, x_mark_enc): enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] enc_out, attns = self.encoder(enc_out) enc_out = self.projection(enc_out) return enc_out # [B, L, D] def anomaly_detection(self, x_enc): enc_out = self.enc_embedding(x_enc, None) # [B,T,C] enc_out, attns = self.encoder(enc_out) enc_out = self.projection(enc_out) return enc_out # [B, L, D] def classification(self, x_enc, x_mark_enc): # enc enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out) # Output # the output transformer encoder/decoder embeddings don't include non-linearity output = self.act(enc_out) output = self.dropout(output) # zero-out padding embeddings output = output * x_mark_enc.unsqueeze(-1) # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast': dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'short_term_forecast': dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/SCINet.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import math class Splitting(nn.Module): def __init__(self): super(Splitting, self).__init__() def even(self, x): return x[:, ::2, :] def odd(self, x): return x[:, 1::2, :] def forward(self, x): # return the odd and even part return self.even(x), self.odd(x) class CausalConvBlock(nn.Module): def __init__(self, d_model, kernel_size=5, dropout=0.0): super(CausalConvBlock, self).__init__() module_list = [ nn.ReplicationPad1d((kernel_size - 1, kernel_size - 1)), nn.Conv1d(d_model, d_model, kernel_size=kernel_size), nn.LeakyReLU(negative_slope=0.01, inplace=True), nn.Dropout(dropout), nn.Conv1d(d_model, d_model, kernel_size=kernel_size), nn.Tanh() ] self.causal_conv = nn.Sequential(*module_list) def forward(self, x): return self.causal_conv(x) # return value is the same as input dimension class SCIBlock(nn.Module): def __init__(self, d_model, kernel_size=5, dropout=0.0): super(SCIBlock, self).__init__() self.splitting = Splitting() self.modules_even, self.modules_odd, self.interactor_even, self.interactor_odd = [CausalConvBlock(d_model) for _ in range(4)] def forward(self, x): x_even, x_odd = self.splitting(x) x_even = x_even.permute(0, 2, 1) x_odd = x_odd.permute(0, 2, 1) x_even_temp = x_even.mul(torch.exp(self.modules_even(x_odd))) x_odd_temp = x_odd.mul(torch.exp(self.modules_odd(x_even))) x_even_update = x_even_temp + self.interactor_even(x_odd_temp) x_odd_update = x_odd_temp - self.interactor_odd(x_even_temp) return x_even_update.permute(0, 2, 1), x_odd_update.permute(0, 2, 1) class SCINet(nn.Module): def __init__(self, d_model, current_level=3, kernel_size=5, dropout=0.0): super(SCINet, self).__init__() self.current_level = current_level self.working_block = SCIBlock(d_model, kernel_size, dropout) if current_level != 0: self.SCINet_Tree_odd = SCINet(d_model, current_level-1, kernel_size, dropout) self.SCINet_Tree_even = SCINet(d_model, current_level-1, kernel_size, dropout) def forward(self, x): odd_flag = False if x.shape[1] % 2 == 1: odd_flag = True x = torch.cat((x, x[:, -1:, :]), dim=1) x_even_update, x_odd_update = self.working_block(x) if odd_flag: x_odd_update = x_odd_update[:, :-1] if self.current_level == 0: return self.zip_up_the_pants(x_even_update, x_odd_update) else: return self.zip_up_the_pants(self.SCINet_Tree_even(x_even_update), self.SCINet_Tree_odd(x_odd_update)) def zip_up_the_pants(self, even, odd): even = even.permute(1, 0, 2) odd = odd.permute(1, 0, 2) even_len = even.shape[0] odd_len = odd.shape[0] min_len = min(even_len, odd_len) zipped_data = [] for i in range(min_len): zipped_data.append(even[i].unsqueeze(0)) zipped_data.append(odd[i].unsqueeze(0)) if even_len > odd_len: zipped_data.append(even[-1].unsqueeze(0)) return torch.cat(zipped_data,0).permute(1, 0, 2) class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len # You can set the number of SCINet stacks by argument "d_layers", but should choose 1 or 2. self.num_stacks = configs.d_layers if self.num_stacks == 1: self.sci_net_1 = SCINet(configs.enc_in, dropout=configs.dropout) self.projection_1 = nn.Conv1d(self.seq_len, self.seq_len + self.pred_len, kernel_size=1, stride=1, bias=False) else: self.sci_net_1, self.sci_net_2 = [SCINet(configs.enc_in, dropout=configs.dropout) for _ in range(2)] self.projection_1 = nn.Conv1d(self.seq_len, self.pred_len, kernel_size=1, stride=1, bias=False) self.projection_2 = nn.Conv1d(self.seq_len+self.pred_len, self.seq_len+self.pred_len, kernel_size = 1, bias = False) # For positional encoding self.pe_hidden_size = configs.enc_in if self.pe_hidden_size % 2 == 1: self.pe_hidden_size += 1 num_timescales = self.pe_hidden_size // 2 max_timescale = 10000.0 min_timescale = 1.0 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / max(num_timescales - 1, 1)) inv_timescales = min_timescale * torch.exp( torch.arange(num_timescales, dtype=torch.float32) * -log_timescale_increment) self.register_buffer('inv_timescales', inv_timescales) def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) # [B,pred_len,C] dec_out = torch.cat([torch.zeros_like(x_enc), dec_out], dim=1) return dec_out # [B, T, D] return None def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # position-encoding pe = self.get_position_encoding(x_enc) if pe.shape[2] > x_enc.shape[2]: x_enc += pe[:, :, :-1] else: x_enc += self.get_position_encoding(x_enc) # SCINet dec_out = self.sci_net_1(x_enc) dec_out += x_enc dec_out = self.projection_1(dec_out) if self.num_stacks != 1: dec_out = torch.cat((x_enc, dec_out), dim=1) temp = dec_out dec_out = self.sci_net_2(dec_out) dec_out += temp dec_out = self.projection_2(dec_out) # De-Normalization from Non-stationary Transformer dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1)) return dec_out def get_position_encoding(self, x): max_length = x.size()[1] position = torch.arange(max_length, dtype=torch.float32, device=x.device) # tensor([0., 1., 2., 3., 4.], device='cuda:0') scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0) # 5 256 signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1) # [T, C] signal = F.pad(signal, (0, 0, 0, self.pe_hidden_size % 2)) signal = signal.view(1, max_length, self.pe_hidden_size) return signal ================================================ FILE: models/SegRNN.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Autoformer_EncDec import series_decomp class Model(nn.Module): """ Paper link: https://arxiv.org/abs/2308.11200.pdf """ def __init__(self, configs): super(Model, self).__init__() # get parameters self.seq_len = configs.seq_len self.enc_in = configs.enc_in self.d_model = configs.d_model self.dropout = configs.dropout self.task_name = configs.task_name if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': self.pred_len = configs.seq_len else: self.pred_len = configs.pred_len self.seg_len = configs.seg_len self.seg_num_x = self.seq_len // self.seg_len self.seg_num_y = self.pred_len // self.seg_len # building model self.valueEmbedding = nn.Sequential( nn.Linear(self.seg_len, self.d_model), nn.ReLU() ) self.rnn = nn.GRU(input_size=self.d_model, hidden_size=self.d_model, num_layers=1, bias=True, batch_first=True, bidirectional=False) self.pos_emb = nn.Parameter(torch.randn(self.seg_num_y, self.d_model // 2)) self.channel_emb = nn.Parameter(torch.randn(self.enc_in, self.d_model // 2)) self.predict = nn.Sequential( nn.Dropout(self.dropout), nn.Linear(self.d_model, self.seg_len) ) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.enc_in * configs.seq_len, configs.num_class) def encoder(self, x): # b:batch_size c:channel_size s:seq_len s:seq_len # d:d_model w:seg_len n:seg_num_x m:seg_num_y batch_size = x.size(0) # normalization and permute b,s,c -> b,c,s seq_last = x[:, -1:, :].detach() x = (x - seq_last).permute(0, 2, 1) # b,c,s # segment and embedding b,c,s -> bc,n,w -> bc,n,d x = self.valueEmbedding(x.reshape(-1, self.seg_num_x, self.seg_len)) # encoding _, hn = self.rnn(x) # bc,n,d 1,bc,d # m,d//2 -> 1,m,d//2 -> c,m,d//2 # c,d//2 -> c,1,d//2 -> c,m,d//2 # c,m,d -> cm,1,d -> bcm, 1, d pos_emb = torch.cat([ self.pos_emb.unsqueeze(0).repeat(self.enc_in, 1, 1), self.channel_emb.unsqueeze(1).repeat(1, self.seg_num_y, 1) ], dim=-1).view(-1, 1, self.d_model).repeat(batch_size,1,1) _, hy = self.rnn(pos_emb, hn.repeat(1, 1, self.seg_num_y).view(1, -1, self.d_model)) # bcm,1,d 1,bcm,d # 1,bcm,d -> 1,bcm,w -> b,c,s y = self.predict(hy).view(-1, self.enc_in, self.pred_len) # permute and denorm y = y.permute(0, 2, 1) + seq_last return y def forecast(self, x_enc): # Encoder return self.encoder(x_enc) def imputation(self, x_enc): # Encoder return self.encoder(x_enc) def anomaly_detection(self, x_enc): # Encoder return self.encoder(x_enc) def classification(self, x_enc): # Encoder enc_out = self.encoder(x_enc) # Output # (batch_size, seq_length * d_model) output = enc_out.reshape(enc_out.shape[0], -1) # (batch_size, num_classes) output = self.projection(output) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc) return dec_out # [B, N] return None ================================================ FILE: models/Sundial.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding from transformers import AutoModelForCausalLM class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = AutoModelForCausalLM.from_pretrained('thuml/sundial-base-128m', trust_remote_code=True) self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): outputs = [] for i in range(x_enc.shape[-1]): output = self.model.generate(x_enc[...,i], max_new_tokens=self.pred_len, num_samples=20) output = output.mean(dim=1) outputs.append(output) dec_out = torch.stack(outputs, dim=-1) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/TSMixer.py ================================================ import torch.nn as nn class ResBlock(nn.Module): def __init__(self, configs): super(ResBlock, self).__init__() self.temporal = nn.Sequential( nn.Linear(configs.seq_len, configs.d_model), nn.ReLU(), nn.Linear(configs.d_model, configs.seq_len), nn.Dropout(configs.dropout) ) self.channel = nn.Sequential( nn.Linear(configs.enc_in, configs.d_model), nn.ReLU(), nn.Linear(configs.d_model, configs.enc_in), nn.Dropout(configs.dropout) ) def forward(self, x): # x: [B, L, D] x = x + self.temporal(x.transpose(1, 2)).transpose(1, 2) x = x + self.channel(x) return x class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.layer = configs.e_layers self.model = nn.ModuleList([ResBlock(configs) for _ in range(configs.e_layers)]) self.pred_len = configs.pred_len self.projection = nn.Linear(configs.seq_len, configs.pred_len) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): # x: [B, L, D] for i in range(self.layer): x_enc = self.model[i](x_enc) enc_out = self.projection(x_enc.transpose(1, 2)).transpose(1, 2) return enc_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] else: raise ValueError('Only forecast tasks implemented yet') ================================================ FILE: models/TemporalFusionTransformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Embed import DataEmbedding, TemporalEmbedding from torch import Tensor from typing import Optional from collections import namedtuple # static: time-independent features # observed: time features of the past(e.g. predicted targets) # known: known information about the past and future(i.e. time stamp) TypePos = namedtuple('TypePos', ['static', 'observed']) # When you want to use new dataset, please add the index of 'static, observed' columns here. # 'known' columns needn't be added, because 'known' inputs are automatically judged and provided by the program. datatype_dict = {'ETTh1': TypePos([], [x for x in range(7)]), 'ETTm1': TypePos([], [x for x in range(7)])} def get_known_len(embed_type, freq): if embed_type != 'timeF': if freq == 't': return 5 else: return 4 else: freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} return freq_map[freq] class TFTTemporalEmbedding(TemporalEmbedding): def __init__(self, d_model, embed_type='fixed', freq='h'): super(TFTTemporalEmbedding, self).__init__(d_model, embed_type, freq) def forward(self, x): x = x.long() minute_x = self.minute_embed(x[:, :, 4]) if hasattr( self, 'minute_embed') else 0. hour_x = self.hour_embed(x[:, :, 3]) weekday_x = self.weekday_embed(x[:, :, 2]) day_x = self.day_embed(x[:, :, 1]) month_x = self.month_embed(x[:, :, 0]) embedding_x = torch.stack([month_x, day_x, weekday_x, hour_x, minute_x], dim=-2) if hasattr( self, 'minute_embed') else torch.stack([month_x, day_x, weekday_x, hour_x], dim=-2) return embedding_x class TFTTimeFeatureEmbedding(nn.Module): def __init__(self, d_model, embed_type='timeF', freq='h'): super(TFTTimeFeatureEmbedding, self).__init__() d_inp = get_known_len(embed_type, freq) self.embed = nn.ModuleList([nn.Linear(1, d_model, bias=False) for _ in range(d_inp)]) def forward(self, x): return torch.stack([embed(x[:,:,i].unsqueeze(-1)) for i, embed in enumerate(self.embed)], dim=-2) class TFTEmbedding(nn.Module): def __init__(self, configs): super(TFTEmbedding, self).__init__() self.pred_len = configs.pred_len self.static_pos = datatype_dict[configs.data].static self.observed_pos = datatype_dict[configs.data].observed self.static_len = len(self.static_pos) self.observed_len = len(self.observed_pos) self.static_embedding = nn.ModuleList([DataEmbedding(1,configs.d_model,dropout=configs.dropout) for _ in range(self.static_len)]) \ if self.static_len else None self.observed_embedding = nn.ModuleList([DataEmbedding(1,configs.d_model,dropout=configs.dropout) for _ in range(self.observed_len)]) self.known_embedding = TFTTemporalEmbedding(configs.d_model, configs.embed, configs.freq) \ if configs.embed != 'timeF' else TFTTimeFeatureEmbedding(configs.d_model, configs.embed, configs.freq) def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec): if self.static_len: # static_input: [B,C,d_model] static_input = torch.stack([embed(x_enc[:,:1,self.static_pos[i]].unsqueeze(-1), None).squeeze(1) for i, embed in enumerate(self.static_embedding)], dim=-2) else: static_input = None # observed_input: [B,T,C,d_model] observed_input = torch.stack([embed(x_enc[:,:,self.observed_pos[i]].unsqueeze(-1), None) for i, embed in enumerate(self.observed_embedding)], dim=-2) x_mark = torch.cat([x_mark_enc, x_mark_dec[:,-self.pred_len:,:]], dim=-2) # known_input: [B,T,C,d_model] known_input = self.known_embedding(x_mark) return static_input, observed_input, known_input class GLU(nn.Module): def __init__(self, input_size, output_size): super().__init__() self.fc1 = nn.Linear(input_size, output_size) self.fc2 = nn.Linear(input_size, output_size) self.glu = nn.GLU() def forward(self, x): a = self.fc1(x) b = self.fc2(x) return self.glu(torch.cat([a, b], dim=-1)) class GateAddNorm(nn.Module): def __init__(self, input_size, output_size): super(GateAddNorm, self).__init__() self.glu = GLU(input_size, input_size) self.projection = nn.Linear(input_size, output_size) if input_size != output_size else nn.Identity() self.layer_norm = nn.LayerNorm(output_size) def forward(self, x, skip_a): x = self.glu(x) x = x + skip_a return self.layer_norm(self.projection(x)) class GRN(nn.Module): def __init__(self, input_size, output_size, hidden_size=None, context_size=None, dropout=0.0): super(GRN, self).__init__() hidden_size = input_size if hidden_size is None else hidden_size self.lin_a = nn.Linear(input_size, hidden_size) self.lin_c = nn.Linear(context_size, hidden_size) if context_size is not None else None self.lin_i = nn.Linear(hidden_size, hidden_size) self.dropout = nn.Dropout(dropout) self.project_a = nn.Linear(input_size, hidden_size) if hidden_size != input_size else nn.Identity() self.gate = GateAddNorm(hidden_size, output_size) def forward(self, a: Tensor, c: Optional[Tensor] = None): # a: [B,T,d], c: [B,d] x = self.lin_a(a) if c is not None: x = x + self.lin_c(c).unsqueeze(1) x = F.elu(x) x = self.lin_i(x) x = self.dropout(x) return self.gate(x, self.project_a(a)) class VariableSelectionNetwork(nn.Module): def __init__(self, d_model, variable_num, dropout=0.0): super(VariableSelectionNetwork, self).__init__() self.joint_grn = GRN(d_model * variable_num, variable_num, hidden_size=d_model, context_size=d_model, dropout=dropout) self.variable_grns = nn.ModuleList([GRN(d_model, d_model, dropout=dropout) for _ in range(variable_num)]) def forward(self, x: Tensor, context: Optional[Tensor] = None): # x: [B,T,C,d] or [B,C,d] # selection_weights: [B,T,C] or [B,C] # x_processed: [B,T,d,C] or [B,d,C] # selection_result: [B,T,d] or [B,d] x_flattened = torch.flatten(x, start_dim=-2) selection_weights = self.joint_grn(x_flattened, context) selection_weights = F.softmax(selection_weights, dim=-1) x_processed = torch.stack([grn(x[...,i,:]) for i, grn in enumerate(self.variable_grns)], dim=-1) selection_result = torch.matmul(x_processed, selection_weights.unsqueeze(-1)).squeeze(-1) return selection_result class StaticCovariateEncoder(nn.Module): def __init__(self, d_model, static_len, dropout=0.0): super(StaticCovariateEncoder, self).__init__() self.static_vsn = VariableSelectionNetwork(d_model, static_len) if static_len else None self.grns = nn.ModuleList([GRN(d_model, d_model, dropout=dropout) for _ in range(4)]) def forward(self, static_input): # static_input: [B,C,d] if static_input is not None: static_features = self.static_vsn(static_input) return [grn(static_features) for grn in self.grns] else: return [None] * 4 class InterpretableMultiHeadAttention(nn.Module): def __init__(self, configs): super(InterpretableMultiHeadAttention, self).__init__() self.n_heads = configs.n_heads assert configs.d_model % configs.n_heads == 0 self.d_head = configs.d_model // configs.n_heads self.qkv_linears = nn.Linear(configs.d_model, (2 * self.n_heads + 1) * self.d_head, bias=False) self.out_projection = nn.Linear(self.d_head, configs.d_model, bias=False) self.out_dropout = nn.Dropout(configs.dropout) self.scale = self.d_head ** -0.5 example_len = configs.seq_len + configs.pred_len self.register_buffer("mask", torch.triu(torch.full((example_len, example_len), float('-inf')), 1)) def forward(self, x): # Q,K,V are all from x B, T, d_model = x.shape qkv = self.qkv_linears(x) q, k, v = qkv.split((self.n_heads * self.d_head, self.n_heads * self.d_head, self.d_head), dim=-1) q = q.view(B, T, self.n_heads, self.d_head) k = k.view(B, T, self.n_heads, self.d_head) v = v.view(B, T, self.d_head) attention_score = torch.matmul(q.permute((0, 2, 1, 3)), k.permute((0, 2, 3, 1))) # [B,n,T,T] attention_score.mul_(self.scale) attention_score = attention_score + self.mask attention_prob = F.softmax(attention_score, dim=3) # [B,n,T,T] attention_out = torch.matmul(attention_prob, v.unsqueeze(1)) # [B,n,T,d] attention_out = torch.mean(attention_out, dim=1) # [B,T,d] out = self.out_projection(attention_out) out = self.out_dropout(out) # [B,T,d] return out class TemporalFusionDecoder(nn.Module): def __init__(self, configs): super(TemporalFusionDecoder, self).__init__() self.pred_len = configs.pred_len self.history_encoder = nn.LSTM(configs.d_model, configs.d_model, batch_first=True) self.future_encoder = nn.LSTM(configs.d_model, configs.d_model, batch_first=True) self.gate_after_lstm = GateAddNorm(configs.d_model, configs.d_model) self.enrichment_grn = GRN(configs.d_model, configs.d_model, context_size=configs.d_model, dropout=configs.dropout) self.attention = InterpretableMultiHeadAttention(configs) self.gate_after_attention = GateAddNorm(configs.d_model, configs.d_model) self.position_wise_grn = GRN(configs.d_model, configs.d_model, dropout=configs.dropout) self.gate_final = GateAddNorm(configs.d_model, configs.d_model) self.out_projection = nn.Linear(configs.d_model, configs.c_out) def forward(self, history_input, future_input, c_c, c_h, c_e): # history_input, future_input: [B,T,d] # c_c, c_h, c_e: [B,d] # LSTM c = (c_c.unsqueeze(0), c_h.unsqueeze(0)) if c_c is not None and c_h is not None else None historical_features, state = self.history_encoder(history_input, c) future_features, _ = self.future_encoder(future_input, state) # Skip connection temporal_input = torch.cat([history_input, future_input], dim=1) temporal_features = torch.cat([historical_features, future_features], dim=1) temporal_features = self.gate_after_lstm(temporal_features, temporal_input) # [B,T,d] # Static enrichment enriched_features = self.enrichment_grn(temporal_features, c_e) # [B,T,d] # Temporal self-attention attention_out = self.attention(enriched_features) # [B,T,d] # Don't compute historical loss attention_out = self.gate_after_attention(attention_out[:,-self.pred_len:], enriched_features[:,-self.pred_len:]) # Position-wise feed-forward out = self.position_wise_grn(attention_out) # [B,T,d] # Final skip connection out = self.gate_final(out, temporal_features[:,-self.pred_len:]) return self.out_projection(out) class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.configs = configs self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len # Number of variables self.static_len = len(datatype_dict[configs.data].static) self.observed_len = len(datatype_dict[configs.data].observed) self.known_len = get_known_len(configs.embed, configs.freq) self.embedding = TFTEmbedding(configs) self.static_encoder = StaticCovariateEncoder(configs.d_model, self.static_len) self.history_vsn = VariableSelectionNetwork(configs.d_model, self.observed_len + self.known_len) self.future_vsn = VariableSelectionNetwork(configs.d_model, self.known_len) self.temporal_fusion_decoder = TemporalFusionDecoder(configs) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev # Data embedding # static_input: [B,C,d], observed_input:[B,T,C,d], known_input: [B,T,C,d] static_input, observed_input, known_input = self.embedding(x_enc, x_mark_enc, x_dec, x_mark_dec) # Static context # c_s,...,c_e: [B,d] c_s, c_c, c_h, c_e = self.static_encoder(static_input) # Temporal input Selection history_input = torch.cat([observed_input, known_input[:,:self.seq_len]], dim=-2) future_input = known_input[:,self.seq_len:] history_input = self.history_vsn(history_input, c_s) future_input = self.future_vsn(future_input, c_s) # TFT main procedure after variable selection # history_input: [B,T,d], future_input: [B,T,d] dec_out = self.temporal_fusion_decoder(history_input, future_input, c_c, c_h, c_e) # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) # [B,pred_len,C] dec_out = torch.cat([torch.zeros_like(x_enc), dec_out], dim=1) return dec_out # [B, T, D] return None ================================================ FILE: models/TiDE.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class LayerNorm(nn.Module): """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """ def __init__(self, ndim, bias): super().__init__() self.weight = nn.Parameter(torch.ones(ndim)) self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None def forward(self, input): return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) class ResBlock(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.1, bias=True): super().__init__() self.fc1 = nn.Linear(input_dim, hidden_dim, bias=bias) self.fc2 = nn.Linear(hidden_dim, output_dim, bias=bias) self.fc3 = nn.Linear(input_dim, output_dim, bias=bias) self.dropout = nn.Dropout(dropout) self.relu = nn.ReLU() self.ln = LayerNorm(output_dim, bias=bias) def forward(self, x): out = self.fc1(x) out = self.relu(out) out = self.fc2(out) out = self.dropout(out) out = out + self.fc3(x) out = self.ln(out) return out #TiDE class Model(nn.Module): """ paper: https://arxiv.org/pdf/2304.08424.pdf """ def __init__(self, configs, bias=True, feature_encode_dim=2): super(Model, self).__init__() self.configs = configs self.task_name = configs.task_name self.seq_len = configs.seq_len #L self.label_len = configs.label_len self.pred_len = configs.pred_len #H self.hidden_dim=configs.d_model self.res_hidden=configs.d_model self.encoder_num=configs.e_layers self.decoder_num=configs.d_layers self.freq=configs.freq self.feature_encode_dim=feature_encode_dim self.decode_dim = configs.c_out self.temporalDecoderHidden=configs.d_ff dropout=configs.dropout freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} self.feature_dim=freq_map[self.freq] flatten_dim = self.seq_len + (self.seq_len + self.pred_len) * self.feature_encode_dim self.feature_encoder = ResBlock(self.feature_dim, self.res_hidden, self.feature_encode_dim, dropout, bias) self.encoders = nn.Sequential(ResBlock(flatten_dim, self.res_hidden, self.hidden_dim, dropout, bias),*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.encoder_num-1))) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.pred_len, dropout, bias)) self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias) self.residual_proj = nn.Linear(self.seq_len, self.pred_len, bias=bias) if self.task_name == 'imputation': self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias)) self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias) self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias) if self.task_name == 'anomaly_detection': self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias)) self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias) self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias) def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark): # Normalization means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev feature = self.feature_encoder(batch_y_mark) hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1)) decoded = self.decoders(hidden).reshape(hidden.shape[0], self.pred_len, self.decode_dim) dec_out = self.temporalDecoder(torch.cat([feature[:,self.seq_len:], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc) # De-Normalization dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len)) dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len)) return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask): # Normalization means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev feature = self.feature_encoder(x_mark_enc) hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1)) decoded = self.decoders(hidden).reshape(hidden.shape[0], self.seq_len, self.decode_dim) dec_out = self.temporalDecoder(torch.cat([feature[:,:self.seq_len], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc) # De-Normalization dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.seq_len)) dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.seq_len)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask=None): '''x_mark_enc is the exogenous dynamic feature described in the original paper''' if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': if batch_y_mark is None: batch_y_mark = torch.zeros((x_enc.shape[0], self.seq_len+self.pred_len, self.feature_dim)).to(x_enc.device).detach() else: batch_y_mark = torch.concat([x_mark_enc, batch_y_mark[:, -self.pred_len:, :]],dim=1) dec_out = torch.stack([self.forecast(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark) for feature in range(x_enc.shape[-1])],dim=-1) return dec_out # [B, L, D] if self.task_name == 'imputation': dec_out = torch.stack([self.imputation(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark, mask) for feature in range(x_enc.shape[-1])],dim=-1) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': raise NotImplementedError("Task anomaly_detection for Tide is temporarily not supported") if self.task_name == 'classification': raise NotImplementedError("Task classification for Tide is temporarily not supported") return None ================================================ FILE: models/TiRex.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding from tirex import load_model, ForecastModel class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = load_model("NX-AI/TiRex") self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc.sub(means) stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc = x_enc.div(stdev) B, L, C = x_enc.shape x_enc = torch.reshape(x_enc, (B*C, L)) quantiles, output = self.model.forecast(x_enc, prediction_length=self.pred_len) dec_out = torch.reshape(output, (B, output.shape[-1], C)).to(x_enc.device) dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/TimeFilter.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import math from layers.Embed import PositionalEmbedding from layers.StandardNorm import Normalize from layers.TimeFilter_layers import TimeFilter_Backbone class PatchEmbed(nn.Module): def __init__(self, dim, patch_len, stride=None, pos=True): super().__init__() self.patch_len = patch_len self.stride = patch_len if stride is None else stride self.patch_proj = nn.Linear(self.patch_len, dim) self.pos = pos if self.pos: pos_emb_theta = 10000 self.pe = PositionalEmbedding(dim, pos_emb_theta) def forward(self, x): # x: [B, N, T] x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) # x: [B, N*L, P] x = self.patch_proj(x) # [B, N*L, D] if self.pos: x += self.pe(x) return x class Model(nn.Module): def __init__(self, configs): super().__init__() self.args = configs self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.n_vars = configs.c_out self.dim = configs.d_model self.d_ff = configs.d_ff self.patch_len = configs.patch_len self.stride = self.patch_len self.num_patches = int((self.seq_len - self.patch_len) / self.stride + 1) # L # Filter self.alpha = 0.1 if configs.alpha is None else configs.alpha self.top_p = 0.5 if configs.top_p is None else configs.top_p # embed self.patch_embed = PatchEmbed(self.dim, self.patch_len, self.stride, configs.pos) # TimeFilter.sh Backbone self.backbone = TimeFilter_Backbone(self.dim, self.n_vars, self.d_ff, configs.n_heads, configs.e_layers, self.top_p, configs.dropout, self.seq_len * self.n_vars // self.patch_len) # head # self.head = nn.Linear(self.dim * self.num_patches, self.pred_len) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.head = nn.Linear(self.dim * self.num_patches, self.pred_len) elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.head = nn.Linear(self.dim * self.num_patches, self.seq_len) elif self.task_name == 'classification': self.num_patches = int((self.seq_len * configs.enc_in - self.patch_len) / self.stride + 1) # L self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( self.dim * self.num_patches, configs.num_class) # Without RevIN self.use_RevIN = False self.norm = Normalize(configs.enc_in, affine=self.use_RevIN) def _get_mask(self, device): dtype = torch.float32 L = self.args.seq_len * self.args.c_out // self.args.patch_len N = self.args.seq_len // self.args.patch_len masks = [] for k in range(L): S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device) T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N) & (torch.arange(L) != k)).to( dtype).to(device) ST = torch.ones(L).to(dtype).to(device) - S - T ST[k] = 0.0 masks.append(torch.stack([S, T, ST], dim=0)) masks = torch.stack(masks, dim=0) return masks def forecast(self, x, masks, x_dec, x_mark_dec): # x: [B, T, C] B, T, C = x.shape # Normalization x = self.norm(x, 'norm') # x: [B, C, T] x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T] x = self.patch_embed(x) # [B, N, D] N = [C*T / P] x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha) # [B, C, T/P, D] x = self.head(x.reshape(-1, self.n_vars, self.num_patches, self.dim).flatten(start_dim=-2)) # [B, C, T] x = x.permute(0, 2, 1) # De-Normalization x = self.norm(x, 'denorm') return x def imputation(self, x, x_mark_enc, x_dec, x_mark_dec, mask): # x: [B, T, C] B, T, C = x.shape # Normalization x = self.norm(x, 'norm') # x: [B, C, T] x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T] x = self.patch_embed(x) # [B, N, D] N = [C*T / P] x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha) # [B, C, T/P, D] x = self.head(x.reshape(-1, self.n_vars, self.num_patches, self.dim).flatten(start_dim=-2)) # [B, C, T] x = x.permute(0, 2, 1) # De-Normalization x = self.norm(x, 'denorm') return x def classification(self, x, x_mark_enc): # x: [B, T, C] B, T, C = x.shape # Normalization x = self.norm(x, 'norm') # x: [B, C, T] x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T] x = self.patch_embed(x) # [B, N, D] N = [C*T / P] x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha) # [B, C, T/P, D] output = self.dropout(x.flatten(start_dim=1)) output = self.projection(output) # (batch_size, num_classes) return output def anomaly_detection(self, x): # x: [B, T, C] B, T, C = x.shape # Normalization x = self.norm(x, 'norm') # x: [B, C, T] x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T] x = self.patch_embed(x) # [B, N, D] N = [C*T / P] x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha) # [B, C, T/P, D] x = self.head(x.reshape(-1, self.n_vars, self.num_patches, self.dim).flatten(start_dim=-2)) # [B, C, T] x = x.permute(0, 2, 1) # De-Normalization x = self.norm(x, 'denorm') return x def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/TimeMixer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Autoformer_EncDec import series_decomp from layers.Embed import DataEmbedding_wo_pos from layers.StandardNorm import Normalize class DFT_series_decomp(nn.Module): """ Series decomposition block """ def __init__(self, top_k: int = 5): super(DFT_series_decomp, self).__init__() self.top_k = top_k def forward(self, x): xf = torch.fft.rfft(x, dim=1) freq = abs(xf) freq[0] = 0 top_k_freq, top_list = torch.topk(freq, k=self.top_k) xf[freq <= top_k_freq.min()] = 0 x_season = torch.fft.irfft(xf, dim=1) x_trend = x - x_season return x_season, x_trend class MultiScaleSeasonMixing(nn.Module): """ Bottom-up mixing season pattern """ def __init__(self, configs): super(MultiScaleSeasonMixing, self).__init__() self.down_sampling_layers = torch.nn.ModuleList( [ nn.Sequential( torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** i), configs.seq_len // (configs.down_sampling_window ** (i + 1)), ), nn.GELU(), torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** (i + 1)), configs.seq_len // (configs.down_sampling_window ** (i + 1)), ), ) for i in range(configs.down_sampling_layers) ] ) def forward(self, season_list): # mixing high->low out_high = season_list[0] out_low = season_list[1] out_season_list = [out_high.permute(0, 2, 1)] for i in range(len(season_list) - 1): out_low_res = self.down_sampling_layers[i](out_high) out_low = out_low + out_low_res out_high = out_low if i + 2 <= len(season_list) - 1: out_low = season_list[i + 2] out_season_list.append(out_high.permute(0, 2, 1)) return out_season_list class MultiScaleTrendMixing(nn.Module): """ Top-down mixing trend pattern """ def __init__(self, configs): super(MultiScaleTrendMixing, self).__init__() self.up_sampling_layers = torch.nn.ModuleList( [ nn.Sequential( torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** (i + 1)), configs.seq_len // (configs.down_sampling_window ** i), ), nn.GELU(), torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** i), configs.seq_len // (configs.down_sampling_window ** i), ), ) for i in reversed(range(configs.down_sampling_layers)) ]) def forward(self, trend_list): # mixing low->high trend_list_reverse = trend_list.copy() trend_list_reverse.reverse() out_low = trend_list_reverse[0] out_high = trend_list_reverse[1] out_trend_list = [out_low.permute(0, 2, 1)] for i in range(len(trend_list_reverse) - 1): out_high_res = self.up_sampling_layers[i](out_low) out_high = out_high + out_high_res out_low = out_high if i + 2 <= len(trend_list_reverse) - 1: out_high = trend_list_reverse[i + 2] out_trend_list.append(out_low.permute(0, 2, 1)) out_trend_list.reverse() return out_trend_list class PastDecomposableMixing(nn.Module): def __init__(self, configs): super(PastDecomposableMixing, self).__init__() self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.down_sampling_window = configs.down_sampling_window self.layer_norm = nn.LayerNorm(configs.d_model) self.dropout = nn.Dropout(configs.dropout) self.channel_independence = configs.channel_independence if configs.decomp_method == 'moving_avg': self.decompsition = series_decomp(configs.moving_avg) elif configs.decomp_method == "dft_decomp": self.decompsition = DFT_series_decomp(configs.top_k) else: raise ValueError('decompsition is error') if not configs.channel_independence: self.cross_layer = nn.Sequential( nn.Linear(in_features=configs.d_model, out_features=configs.d_ff), nn.GELU(), nn.Linear(in_features=configs.d_ff, out_features=configs.d_model), ) # Mixing season self.mixing_multi_scale_season = MultiScaleSeasonMixing(configs) # Mxing trend self.mixing_multi_scale_trend = MultiScaleTrendMixing(configs) self.out_cross_layer = nn.Sequential( nn.Linear(in_features=configs.d_model, out_features=configs.d_ff), nn.GELU(), nn.Linear(in_features=configs.d_ff, out_features=configs.d_model), ) def forward(self, x_list): length_list = [] for x in x_list: _, T, _ = x.size() length_list.append(T) # Decompose to obtain the season and trend season_list = [] trend_list = [] for x in x_list: season, trend = self.decompsition(x) if not self.channel_independence: season = self.cross_layer(season) trend = self.cross_layer(trend) season_list.append(season.permute(0, 2, 1)) trend_list.append(trend.permute(0, 2, 1)) # bottom-up season mixing out_season_list = self.mixing_multi_scale_season(season_list) # top-down trend mixing out_trend_list = self.mixing_multi_scale_trend(trend_list) out_list = [] for ori, out_season, out_trend, length in zip(x_list, out_season_list, out_trend_list, length_list): out = out_season + out_trend if self.channel_independence: out = ori + self.out_cross_layer(out) out_list.append(out[:, :length, :]) return out_list class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.configs = configs self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.down_sampling_window = configs.down_sampling_window self.channel_independence = configs.channel_independence self.pdm_blocks = nn.ModuleList([PastDecomposableMixing(configs) for _ in range(configs.e_layers)]) self.preprocess = series_decomp(configs.moving_avg) self.enc_in = configs.enc_in if self.channel_independence: self.enc_embedding = DataEmbedding_wo_pos(1, configs.d_model, configs.embed, configs.freq, configs.dropout) else: self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.layer = configs.e_layers self.normalize_layers = torch.nn.ModuleList( [ Normalize(self.configs.enc_in, affine=True, non_norm=True if configs.use_norm == 0 else False) for i in range(configs.down_sampling_layers + 1) ] ) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.predict_layers = torch.nn.ModuleList( [ torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** i), configs.pred_len, ) for i in range(configs.down_sampling_layers + 1) ] ) if self.channel_independence: self.projection_layer = nn.Linear( configs.d_model, 1, bias=True) else: self.projection_layer = nn.Linear( configs.d_model, configs.c_out, bias=True) self.out_res_layers = torch.nn.ModuleList([ torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** i), configs.seq_len // (configs.down_sampling_window ** i), ) for i in range(configs.down_sampling_layers + 1) ]) self.regression_layers = torch.nn.ModuleList( [ torch.nn.Linear( configs.seq_len // (configs.down_sampling_window ** i), configs.pred_len, ) for i in range(configs.down_sampling_layers + 1) ] ) if self.task_name == 'imputation' or self.task_name == 'anomaly_detection': if self.channel_independence: self.projection_layer = nn.Linear( configs.d_model, 1, bias=True) else: self.projection_layer = nn.Linear( configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.d_model * configs.seq_len, configs.num_class) def out_projection(self, dec_out, i, out_res): dec_out = self.projection_layer(dec_out) out_res = out_res.permute(0, 2, 1) out_res = self.out_res_layers[i](out_res) out_res = self.regression_layers[i](out_res).permute(0, 2, 1) dec_out = dec_out + out_res return dec_out def pre_enc(self, x_list): if self.channel_independence: return (x_list, None) else: out1_list = [] out2_list = [] for x in x_list: x_1, x_2 = self.preprocess(x) out1_list.append(x_1) out2_list.append(x_2) return (out1_list, out2_list) def __multi_scale_process_inputs(self, x_enc, x_mark_enc): if self.configs.down_sampling_method == 'max': down_pool = torch.nn.MaxPool1d(self.configs.down_sampling_window, return_indices=False) elif self.configs.down_sampling_method == 'avg': down_pool = torch.nn.AvgPool1d(self.configs.down_sampling_window) elif self.configs.down_sampling_method == 'conv': padding = 1 if torch.__version__ >= '1.5.0' else 2 down_pool = nn.Conv1d(in_channels=self.configs.enc_in, out_channels=self.configs.enc_in, kernel_size=3, padding=padding, stride=self.configs.down_sampling_window, padding_mode='circular', bias=False) else: return x_enc, x_mark_enc # B,T,C -> B,C,T x_enc = x_enc.permute(0, 2, 1) x_enc_ori = x_enc x_mark_enc_mark_ori = x_mark_enc x_enc_sampling_list = [] x_mark_sampling_list = [] x_enc_sampling_list.append(x_enc.permute(0, 2, 1)) x_mark_sampling_list.append(x_mark_enc) for i in range(self.configs.down_sampling_layers): x_enc_sampling = down_pool(x_enc_ori) x_enc_sampling_list.append(x_enc_sampling.permute(0, 2, 1)) x_enc_ori = x_enc_sampling if x_mark_enc is not None: x_mark_sampling_list.append(x_mark_enc_mark_ori[:, ::self.configs.down_sampling_window, :]) x_mark_enc_mark_ori = x_mark_enc_mark_ori[:, ::self.configs.down_sampling_window, :] x_enc = x_enc_sampling_list x_mark_enc = x_mark_sampling_list if x_mark_enc is not None else None return x_enc, x_mark_enc def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc) x_list = [] x_mark_list = [] if x_mark_enc is not None: for i, x, x_mark in zip(range(len(x_enc)), x_enc, x_mark_enc): B, T, N = x.size() x = self.normalize_layers[i](x, 'norm') if self.channel_independence: x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1) x_list.append(x) x_mark = x_mark.repeat(N, 1, 1) x_mark_list.append(x_mark) else: x_list.append(x) x_mark_list.append(x_mark) else: for i, x in zip(range(len(x_enc)), x_enc, ): B, T, N = x.size() x = self.normalize_layers[i](x, 'norm') if self.channel_independence: x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1) x_list.append(x) # embedding enc_out_list = [] x_list = self.pre_enc(x_list) if x_mark_enc is not None: for i, x, x_mark in zip(range(len(x_list[0])), x_list[0], x_mark_list): enc_out = self.enc_embedding(x, x_mark) # [B,T,C] enc_out_list.append(enc_out) else: for i, x in zip(range(len(x_list[0])), x_list[0]): enc_out = self.enc_embedding(x, None) # [B,T,C] enc_out_list.append(enc_out) # Past Decomposable Mixing as encoder for past for i in range(self.layer): enc_out_list = self.pdm_blocks[i](enc_out_list) # Future Multipredictor Mixing as decoder for future dec_out_list = self.future_multi_mixing(B, enc_out_list, x_list) dec_out = torch.stack(dec_out_list, dim=-1).sum(-1) dec_out = self.normalize_layers[0](dec_out, 'denorm') return dec_out def future_multi_mixing(self, B, enc_out_list, x_list): dec_out_list = [] if self.channel_independence: x_list = x_list[0] for i, enc_out in zip(range(len(x_list)), enc_out_list): dec_out = self.predict_layers[i](enc_out.permute(0, 2, 1)).permute( 0, 2, 1) # align temporal dimension dec_out = self.projection_layer(dec_out) dec_out = dec_out.reshape(B, self.configs.c_out, self.pred_len).permute(0, 2, 1).contiguous() dec_out_list.append(dec_out) else: for i, enc_out, out_res in zip(range(len(x_list[0])), enc_out_list, x_list[1]): dec_out = self.predict_layers[i](enc_out.permute(0, 2, 1)).permute( 0, 2, 1) # align temporal dimension dec_out = self.out_projection(dec_out, i, out_res) dec_out_list.append(dec_out) return dec_out_list def classification(self, x_enc, x_mark_enc): x_enc, _ = self.__multi_scale_process_inputs(x_enc, None) x_list = x_enc # embedding enc_out_list = [] for x in x_list: enc_out = self.enc_embedding(x, None) # [B,T,C] enc_out_list.append(enc_out) # MultiScale-CrissCrossAttention as encoder for past for i in range(self.layer): enc_out_list = self.pdm_blocks[i](enc_out_list) enc_out = enc_out_list[0] # Output # the output transformer encoder/decoder embeddings don't include non-linearity output = self.act(enc_out) output = self.dropout(output) # zero-out padding embeddings output = output * x_mark_enc.unsqueeze(-1) # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def anomaly_detection(self, x_enc): B, T, N = x_enc.size() x_enc, _ = self.__multi_scale_process_inputs(x_enc, None) x_list = [] for i, x in zip(range(len(x_enc)), x_enc, ): B, T, N = x.size() x = self.normalize_layers[i](x, 'norm') if self.channel_independence: x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1) x_list.append(x) # embedding enc_out_list = [] for x in x_list: enc_out = self.enc_embedding(x, None) # [B,T,C] enc_out_list.append(enc_out) # MultiScale-CrissCrossAttention as encoder for past for i in range(self.layer): enc_out_list = self.pdm_blocks[i](enc_out_list) dec_out = self.projection_layer(enc_out_list[0]) dec_out = dec_out.reshape(B, self.configs.c_out, -1).permute(0, 2, 1).contiguous() dec_out = self.normalize_layers[0](dec_out, 'denorm') return dec_out def imputation(self, x_enc, x_mark_enc, mask): means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1) means = means.unsqueeze(1).detach() x_enc = x_enc - means x_enc = x_enc.masked_fill(mask == 0, 0) stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5) stdev = stdev.unsqueeze(1).detach() x_enc /= stdev B, T, N = x_enc.size() x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc) x_list = [] x_mark_list = [] if x_mark_enc is not None: for i, x, x_mark in zip(range(len(x_enc)), x_enc, x_mark_enc): B, T, N = x.size() if self.channel_independence: x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1) x_list.append(x) x_mark = x_mark.repeat(N, 1, 1) x_mark_list.append(x_mark) else: for i, x in zip(range(len(x_enc)), x_enc, ): B, T, N = x.size() if self.channel_independence: x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1) x_list.append(x) # embedding enc_out_list = [] for x in x_list: enc_out = self.enc_embedding(x, None) # [B,T,C] enc_out_list.append(enc_out) # MultiScale-CrissCrossAttention as encoder for past for i in range(self.layer): enc_out_list = self.pdm_blocks[i](enc_out_list) dec_out = self.projection_layer(enc_out_list[0]) dec_out = dec_out.reshape(B, self.configs.c_out, -1).permute(0, 2, 1).contiguous() dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] else: raise ValueError('Other tasks implemented yet') ================================================ FILE: models/TimeMoE.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding from transformers import AutoModelForCausalLM class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = AutoModelForCausalLM.from_pretrained('Maple728/TimeMoE-50M', trust_remote_code=True) self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc.sub(means) stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc = x_enc.div(stdev) B, L, C = x_enc.shape x_enc = torch.reshape(x_enc, (B*C, L)) output = self.model.generate(x_enc, max_new_tokens=self.pred_len) dec_out = torch.reshape(output, (B, output.shape[-1], C)) dec_out = dec_out[:,-self.pred_len:, :] dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/TimeXer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import DataEmbedding_inverted, PositionalEmbedding import numpy as np class FlattenHead(nn.Module): def __init__(self, n_vars, nf, target_window, head_dropout=0): super().__init__() self.n_vars = n_vars self.flatten = nn.Flatten(start_dim=-2) self.linear = nn.Linear(nf, target_window) self.dropout = nn.Dropout(head_dropout) def forward(self, x): # x: [bs x nvars x d_model x patch_num] x = self.flatten(x) x = self.linear(x) x = self.dropout(x) return x class EnEmbedding(nn.Module): def __init__(self, n_vars, d_model, patch_len, dropout): super(EnEmbedding, self).__init__() # Patching self.patch_len = patch_len self.value_embedding = nn.Linear(patch_len, d_model, bias=False) self.glb_token = nn.Parameter(torch.randn(1, n_vars, 1, d_model)) self.position_embedding = PositionalEmbedding(d_model) self.dropout = nn.Dropout(dropout) def forward(self, x): # do patching n_vars = x.shape[1] glb = self.glb_token.repeat((x.shape[0], 1, 1, 1)) x = x.unfold(dimension=-1, size=self.patch_len, step=self.patch_len) x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) # Input encoding x = self.value_embedding(x) + self.position_embedding(x) x = torch.reshape(x, (-1, n_vars, x.shape[-2], x.shape[-1])) x = torch.cat([x, glb], dim=2) x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) return self.dropout(x), n_vars class Encoder(nn.Module): def __init__(self, layers, norm_layer=None, projection=None): super(Encoder, self).__init__() self.layers = nn.ModuleList(layers) self.norm = norm_layer self.projection = projection def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): for layer in self.layers: x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) if self.norm is not None: x = self.norm(x) if self.projection is not None: x = self.projection(x) return x class EncoderLayer(nn.Module): def __init__(self, self_attention, cross_attention, d_model, d_ff=None, dropout=0.1, activation="relu"): super(EncoderLayer, self).__init__() d_ff = d_ff or 4 * d_model self.self_attention = self_attention self.cross_attention = cross_attention self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.dropout = nn.Dropout(dropout) self.activation = F.relu if activation == "relu" else F.gelu def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): B, L, D = cross.shape x = x + self.dropout(self.self_attention( x, x, x, attn_mask=x_mask, tau=tau, delta=None )[0]) x = self.norm1(x) x_glb_ori = x[:, -1, :].unsqueeze(1) x_glb = torch.reshape(x_glb_ori, (B, -1, D)) x_glb_attn = self.dropout(self.cross_attention( x_glb, cross, cross, attn_mask=cross_mask, tau=tau, delta=delta )[0]) x_glb_attn = torch.reshape(x_glb_attn, (x_glb_attn.shape[0] * x_glb_attn.shape[1], x_glb_attn.shape[2])).unsqueeze(1) x_glb = x_glb_ori + x_glb_attn x_glb = self.norm2(x_glb) y = x = torch.cat([x[:, :-1, :], x_glb], dim=1) y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) y = self.dropout(self.conv2(y).transpose(-1, 1)) return self.norm3(x + y) class Model(nn.Module): def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.features = configs.features self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.use_norm = configs.use_norm self.patch_len = configs.patch_len self.patch_num = int(configs.seq_len // configs.patch_len) self.n_vars = 1 if configs.features == 'MS' else configs.enc_in # Embedding self.en_embedding = EnEmbedding(self.n_vars, configs.d_model, self.patch_len, configs.dropout) self.ex_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder-only architecture self.encoder = Encoder( [ EncoderLayer( AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.e_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model) ) self.head_nf = configs.d_model * (self.patch_num + 1) self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len, head_dropout=configs.dropout) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): if self.use_norm: # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, _, N = x_enc.shape en_embed, n_vars = self.en_embedding(x_enc[:, :, -1].unsqueeze(-1).permute(0, 2, 1)) ex_embed = self.ex_embedding(x_enc[:, :, :-1], x_mark_enc) enc_out = self.encoder(en_embed, ex_embed) enc_out = torch.reshape( enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) # z: [bs x nvars x d_model x patch_num] enc_out = enc_out.permute(0, 1, 3, 2) dec_out = self.head(enc_out) # z: [bs x nvars x target_window] dec_out = dec_out.permute(0, 2, 1) if self.use_norm: # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + (means[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forecast_multi(self, x_enc, x_mark_enc, x_dec, x_mark_dec): if self.use_norm: # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, _, N = x_enc.shape en_embed, n_vars = self.en_embedding(x_enc.permute(0, 2, 1)) ex_embed = self.ex_embedding(x_enc, x_mark_enc) enc_out = self.encoder(en_embed, ex_embed) enc_out = torch.reshape( enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1])) # z: [bs x nvars x d_model x patch_num] enc_out = enc_out.permute(0, 1, 3, 2) dec_out = self.head(enc_out) # z: [bs x nvars x target_window] dec_out = dec_out.permute(0, 2, 1) if self.use_norm: # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': if self.features == 'M': dec_out = self.forecast_multi(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] else: dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] else: return None ================================================ FILE: models/TimesFM.py ================================================ import torch from torch import nn from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import PatchEmbedding import timesfm class Model(nn.Module): def __init__(self, configs): """ patch_len: int, patch len for patch_embedding stride: int, stride for patch_embedding """ super().__init__() self.model = timesfm.TimesFM_2p5_200M_torch.from_pretrained("google/timesfm-2.5-200m-pytorch") self.model.compile( timesfm.ForecastConfig( max_context=configs.seq_len, max_horizon=configs.pred_len, normalize_inputs=True, use_continuous_quantile_head=True, force_flip_invariance=True, infer_is_positive=True, fix_quantile_crossing=True, ) ) self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc.sub(means) stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc = x_enc.div(stdev) B, L, C = x_enc.shape device = x_enc.device x_enc = torch.reshape(x_enc, (B*C, L)) output, _ = self.model.forecast( horizon=self.pred_len, inputs=x_enc.cpu().numpy() ) output = torch.Tensor(output).to(device) dec_out = torch.reshape(output, (B, output.shape[-1], C)).to(x_enc.device) dec_out = dec_out * \ (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + \ (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'zero_shot_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out return None ================================================ FILE: models/TimesNet.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import torch.fft from layers.Embed import DataEmbedding from layers.Conv_Blocks import Inception_Block_V1 def FFT_for_Period(x, k=2): # [B, T, C] xf = torch.fft.rfft(x, dim=1) # find period by amplitudes frequency_list = abs(xf).mean(0).mean(-1) frequency_list[0] = 0 _, top_list = torch.topk(frequency_list, k) top_list = top_list.detach().cpu().numpy() period = x.shape[1] // top_list return period, abs(xf).mean(-1)[:, top_list] class TimesBlock(nn.Module): def __init__(self, configs): super(TimesBlock, self).__init__() self.seq_len = configs.seq_len self.pred_len = configs.pred_len self.k = configs.top_k # parameter-efficient design self.conv = nn.Sequential( Inception_Block_V1(configs.d_model, configs.d_ff, num_kernels=configs.num_kernels), nn.GELU(), Inception_Block_V1(configs.d_ff, configs.d_model, num_kernels=configs.num_kernels) ) def forward(self, x): B, T, N = x.size() period_list, period_weight = FFT_for_Period(x, self.k) res = [] for i in range(self.k): period = period_list[i] # padding if (self.seq_len + self.pred_len) % period != 0: length = ( ((self.seq_len + self.pred_len) // period) + 1) * period padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device) out = torch.cat([x, padding], dim=1) else: length = (self.seq_len + self.pred_len) out = x # reshape out = out.reshape(B, length // period, period, N).permute(0, 3, 1, 2).contiguous() # 2D conv: from 1d Variation to 2d Variation out = self.conv(out) # reshape back out = out.permute(0, 2, 3, 1).reshape(B, -1, N) res.append(out[:, :(self.seq_len + self.pred_len), :]) res = torch.stack(res, dim=-1) # adaptive aggregation period_weight = F.softmax(period_weight, dim=1) period_weight = period_weight.unsqueeze( 1).unsqueeze(1).repeat(1, T, N, 1) res = torch.sum(res * period_weight, -1) # residual connection res = res + x return res class Model(nn.Module): """ Paper link: https://openreview.net/pdf?id=ju_Uqw384Oq """ def __init__(self, configs): super(Model, self).__init__() self.configs = configs self.task_name = configs.task_name self.seq_len = configs.seq_len self.label_len = configs.label_len self.pred_len = configs.pred_len self.model = nn.ModuleList([TimesBlock(configs) for _ in range(configs.e_layers)]) self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.layer = configs.e_layers self.layer_norm = nn.LayerNorm(configs.d_model) if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.predict_linear = nn.Linear( self.seq_len, self.pred_len + self.seq_len) self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) if self.task_name == 'imputation' or self.task_name == 'anomaly_detection': self.projection = nn.Linear( configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear( configs.d_model * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc.sub(means) stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc = x_enc.div(stdev) # embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute( 0, 2, 1) # align temporal dimension # TimesNet for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # project back dec_out = self.projection(enc_out) # De-Normalization from Non-stationary Transformer dec_out = dec_out.mul( (stdev[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1))) dec_out = dec_out.add( (means[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1))) return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # Normalization from Non-stationary Transformer means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1) means = means.unsqueeze(1).detach() x_enc = x_enc.sub(means) x_enc = x_enc.masked_fill(mask == 0, 0) stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5) stdev = stdev.unsqueeze(1).detach() x_enc = x_enc.div(stdev) # embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] # TimesNet for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # project back dec_out = self.projection(enc_out) # De-Normalization from Non-stationary Transformer dec_out = dec_out.mul( (stdev[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1))) dec_out = dec_out.add( (means[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1))) return dec_out def anomaly_detection(self, x_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc.sub(means) stdev = torch.sqrt( torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc = x_enc.div(stdev) # embedding enc_out = self.enc_embedding(x_enc, None) # [B,T,C] # TimesNet for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # project back dec_out = self.projection(enc_out) # De-Normalization from Non-stationary Transformer dec_out = dec_out.mul( (stdev[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1))) dec_out = dec_out.add( (means[:, 0, :].unsqueeze(1).repeat( 1, self.pred_len + self.seq_len, 1))) return dec_out def classification(self, x_enc, x_mark_enc): # embedding enc_out = self.enc_embedding(x_enc, None) # [B,T,C] # TimesNet for i in range(self.layer): enc_out = self.layer_norm(self.model[i](enc_out)) # Output # the output transformer encoder/decoder embeddings don't include non-linearity output = self.act(enc_out) output = self.dropout(output) # zero-out padding embeddings output = output * x_mark_enc.unsqueeze(-1) # (batch_size, seq_length * d_model) output = output.reshape(output.shape[0], -1) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation( x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/Transformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import DataEmbedding import numpy as np class Model(nn.Module): """ Vanilla Transformer with O(L^2) complexity Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.pred_len = configs.pred_len # Embedding self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model) ) # Decoder if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, configs.dropout) self.decoder = Decoder( [ DecoderLayer( AttentionLayer( FullAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation, ) for l in range(configs.d_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model), projection=nn.Linear(configs.d_model, configs.c_out, bias=True) ) if self.task_name == 'imputation': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.dec_embedding(x_dec, x_mark_dec) dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # Embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out) return dec_out def anomaly_detection(self, x_enc): # Embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out) return dec_out def classification(self, x_enc, x_mark_enc): # Embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # Output output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: models/WPMixer.py ================================================ # -*- coding: utf-8 -*- """ Created on Sun Jan 5 16:10:01 2025 @author: Murad SISLab, USF mmurad@usf.edu https://github.com/Secure-and-Intelligent-Systems-Lab/WPMixer """ import torch.nn as nn import torch from layers.DWT_Decomposition import Decomposition class TokenMixer(nn.Module): def __init__(self, input_seq=[], batch_size=[], channel=[], pred_seq=[], dropout=[], factor=[], d_model=[]): super(TokenMixer, self).__init__() self.input_seq = input_seq self.batch_size = batch_size self.channel = channel self.pred_seq = pred_seq self.dropout = dropout self.factor = factor self.d_model = d_model self.dropoutLayer = nn.Dropout(self.dropout) self.layers = nn.Sequential(nn.Linear(self.input_seq, self.pred_seq * self.factor), nn.GELU(), nn.Dropout(self.dropout), nn.Linear(self.pred_seq * self.factor, self.pred_seq) ) def forward(self, x): x = x.transpose(1, 2) x = self.layers(x) x = x.transpose(1, 2) return x class Mixer(nn.Module): def __init__(self, input_seq=[], out_seq=[], batch_size=[], channel=[], d_model=[], dropout=[], tfactor=[], dfactor=[]): super(Mixer, self).__init__() self.input_seq = input_seq self.pred_seq = out_seq self.batch_size = batch_size self.channel = channel self.d_model = d_model self.dropout = dropout self.tfactor = tfactor # expansion factor for patch mixer self.dfactor = dfactor # expansion factor for embedding mixer self.tMixer = TokenMixer(input_seq=self.input_seq, batch_size=self.batch_size, channel=self.channel, pred_seq=self.pred_seq, dropout=self.dropout, factor=self.tfactor, d_model=self.d_model) self.dropoutLayer = nn.Dropout(self.dropout) self.norm1 = nn.BatchNorm2d(self.channel) self.norm2 = nn.BatchNorm2d(self.channel) self.embeddingMixer = nn.Sequential(nn.Linear(self.d_model, self.d_model * self.dfactor), nn.GELU(), nn.Dropout(self.dropout), nn.Linear(self.d_model * self.dfactor, self.d_model)) def forward(self, x): ''' Parameters ---------- x : input: [Batch, Channel, Patch_number, d_model] Returns ------- x: output: [Batch, Channel, Patch_number, d_model] ''' x = self.norm1(x) x = x.permute(0, 3, 1, 2) x = self.dropoutLayer(self.tMixer(x)) x = x.permute(0, 2, 3, 1) x = self.norm2(x) x = x + self.dropoutLayer(self.embeddingMixer(x)) return x class ResolutionBranch(nn.Module): def __init__(self, input_seq=[], pred_seq=[], batch_size=[], channel=[], d_model=[], dropout=[], embedding_dropout=[], tfactor=[], dfactor=[], patch_len=[], patch_stride=[]): super(ResolutionBranch, self).__init__() self.input_seq = input_seq self.pred_seq = pred_seq self.batch_size = batch_size self.channel = channel self.d_model = d_model self.dropout = dropout self.embedding_dropout = embedding_dropout self.tfactor = tfactor self.dfactor = dfactor self.patch_len = patch_len self.patch_stride = patch_stride self.patch_num = int((self.input_seq - self.patch_len) / self.patch_stride + 2) self.patch_norm = nn.BatchNorm2d(self.channel) self.patch_embedding_layer = nn.Linear(self.patch_len, self.d_model) # shared among all channels self.mixer1 = Mixer(input_seq=self.patch_num, out_seq=self.patch_num, batch_size=self.batch_size, channel=self.channel, d_model=self.d_model, dropout=self.dropout, tfactor=self.tfactor, dfactor=self.dfactor) self.mixer2 = Mixer(input_seq=self.patch_num, out_seq=self.patch_num, batch_size=self.batch_size, channel=self.channel, d_model=self.d_model, dropout=self.dropout, tfactor=self.tfactor, dfactor=self.dfactor) self.norm = nn.BatchNorm2d(self.channel) self.dropoutLayer = nn.Dropout(self.embedding_dropout) self.head = nn.Sequential(nn.Flatten(start_dim=-2, end_dim=-1), nn.Linear(self.patch_num * self.d_model, self.pred_seq)) def forward(self, x): ''' Parameters ---------- x : input coefficient series: [Batch, channel, length_of_coefficient_series] Returns ------- out : predicted coefficient series: [Batch, channel, length_of_pred_coeff_series] ''' x_patch = self.do_patching(x) x_patch = self.patch_norm(x_patch) x_emb = self.dropoutLayer(self.patch_embedding_layer(x_patch)) out = self.mixer1(x_emb) res = out out = res + self.mixer2(out) out = self.norm(out) out = self.head(out) return out def do_patching(self, x): x_end = x[:, :, -1:] x_padding = x_end.repeat(1, 1, self.patch_stride) x_new = torch.cat((x, x_padding), dim=-1) x_patch = x_new.unfold(dimension=-1, size=self.patch_len, step=self.patch_stride) return x_patch class WPMixerCore(nn.Module): def __init__(self, input_length=[], pred_length=[], wavelet_name=[], level=[], batch_size=[], channel=[], d_model=[], dropout=[], embedding_dropout=[], tfactor=[], dfactor=[], device=[], patch_len=[], patch_stride=[], no_decomposition=[], use_amp=[]): super(WPMixerCore, self).__init__() self.input_length = input_length self.pred_length = pred_length self.wavelet_name = wavelet_name self.level = level self.batch_size = batch_size self.channel = channel self.d_model = d_model self.dropout = dropout self.embedding_dropout = embedding_dropout self.device = device self.no_decomposition = no_decomposition self.tfactor = tfactor self.dfactor = dfactor self.use_amp = use_amp self.Decomposition_model = Decomposition(input_length=self.input_length, pred_length=self.pred_length, wavelet_name=self.wavelet_name, level=self.level, batch_size=self.batch_size, channel=self.channel, d_model=self.d_model, tfactor=self.tfactor, dfactor=self.dfactor, device=self.device, no_decomposition=self.no_decomposition, use_amp=self.use_amp) self.input_w_dim = self.Decomposition_model.input_w_dim # list of the length of the input coefficient series self.pred_w_dim = self.Decomposition_model.pred_w_dim # list of the length of the predicted coefficient series self.patch_len = patch_len self.patch_stride = patch_stride # (m+1) number of resolutionBranch self.resolutionBranch = nn.ModuleList([ResolutionBranch(input_seq=self.input_w_dim[i], pred_seq=self.pred_w_dim[i], batch_size=self.batch_size, channel=self.channel, d_model=self.d_model, dropout=self.dropout, embedding_dropout=self.embedding_dropout, tfactor=self.tfactor, dfactor=self.dfactor, patch_len=self.patch_len, patch_stride=self.patch_stride) for i in range(len(self.input_w_dim))]) def forward(self, xL): ''' Parameters ---------- xL : Look back window: [Batch, look_back_length, channel] Returns ------- xT : Prediction time series: [Batch, prediction_length, output_channel] ''' x = xL.transpose(1, 2) # [batch, channel, look_back_length] # xA: approximation coefficient series, # xD: detail coefficient series # yA: predicted approximation coefficient series # yD: predicted detail coefficient series xA, xD = self.Decomposition_model.transform(x) yA = self.resolutionBranch[0](xA) yD = [] for i in range(len(xD)): yD_i = self.resolutionBranch[i + 1](xD[i]) yD.append(yD_i) y = self.Decomposition_model.inv_transform(yA, yD) y = y.transpose(1, 2) xT = y[:, -self.pred_length:, :] # decomposition output is always even, but pred length can be odd return xT class Model(nn.Module): def __init__(self, args, tfactor=5, dfactor=5, wavelet='db2', level=1, stride=8, no_decomposition=False): super(Model, self).__init__() self.args = args self.task_name = args.task_name self.wpmixerCore = WPMixerCore(input_length=self.args.seq_len, pred_length=self.args.pred_len, wavelet_name=wavelet, level=level, batch_size=self.args.batch_size, channel=self.args.c_out, d_model=self.args.d_model, dropout=self.args.dropout, embedding_dropout=self.args.dropout, tfactor=tfactor, dfactor=dfactor, device=self.args.device, patch_len=self.args.patch_len, patch_stride=stride, no_decomposition=no_decomposition, use_amp=self.args.use_amp) def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark): # Normalization means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev pred = self.wpmixerCore(x_enc) pred = pred[:, :, -self.args.c_out:] # De-Normalization dec_out = pred * (stdev[:, 0].unsqueeze(1).repeat(1, self.args.pred_len, 1)) dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.args.pred_len, 1)) return dec_out def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out # [B, L, D] if self.task_name == 'imputation': raise NotImplementedError("Task imputation for WPMixer is temporarily not supported") if self.task_name == 'anomaly_detection': raise NotImplementedError("Task anomaly_detection for WPMixer is temporarily not supported") if self.task_name == 'classification': raise NotImplementedError("Task classification for WPMixer is temporarily not supported") return None ================================================ FILE: models/__init__.py ================================================ pass ================================================ FILE: models/iTransformer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from layers.Transformer_EncDec import Encoder, EncoderLayer from layers.SelfAttention_Family import FullAttention, AttentionLayer from layers.Embed import DataEmbedding_inverted import numpy as np class Model(nn.Module): """ Paper link: https://arxiv.org/abs/2310.06625 """ def __init__(self, configs): super(Model, self).__init__() self.task_name = configs.task_name self.seq_len = configs.seq_len self.pred_len = configs.pred_len # Embedding self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq, configs.dropout) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), configs.d_model, configs.n_heads), configs.d_model, configs.d_ff, dropout=configs.dropout, activation=configs.activation ) for l in range(configs.e_layers) ], norm_layer=torch.nn.LayerNorm(configs.d_model) ) # Decoder if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) if self.task_name == 'imputation': self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) if self.task_name == 'anomaly_detection': self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True) if self.task_name == 'classification': self.act = F.gelu self.dropout = nn.Dropout(configs.dropout) self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class) def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, _, N = x_enc.shape # Embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, L, N = x_enc.shape # Embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) return dec_out def anomaly_detection(self, x_enc): # Normalization from Non-stationary Transformer means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, L, N = x_enc.shape # Embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) return dec_out def classification(self, x_enc, x_mark_enc): # Embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) # Output output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) output = self.projection(output) # (batch_size, num_classes) return output def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len:, :] # [B, L, D] if self.task_name == 'imputation': dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == 'anomaly_detection': dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == 'classification': dec_out = self.classification(x_enc, x_mark_enc) return dec_out # [B, N] return None ================================================ FILE: requirements.txt ================================================ einops==0.8.1 local-attention==1.11.2 reformer-pytorch==1.4.4 numpy==2.1.2 scipy==1.16.3 scikit-learn==1.7.2 pandas==2.3.3 matplotlib==3.10.8 sktime==0.40.1 sympy==1.13.1 PyWavelets==1.9.0 datasets==4.5.0 tqdm==4.66.5 patool==4.0.3 transformers==4.57.3 huggingface_hub==0.36.0 chronos-forecasting==2.2.1 tirex-ts==1.3.0 timesfm==1.3.0 # uni2ts deps ###### gluonts==0.16.2 lightning==2.6.0 hydra-core==1.3.0 jax==0.8.1 jaxtyping==0.3.4 orjson==3.11.5 tensorboard==2.20.0 python-dotenv==1.0.0 ###### ================================================ FILE: run.py ================================================ import argparse import os import torch import torch.backends from utils.print_args import print_args import random import numpy as np if __name__ == '__main__': fix_seed = 2021 random.seed(fix_seed) torch.manual_seed(fix_seed) np.random.seed(fix_seed) parser = argparse.ArgumentParser(description='TimesNet') # basic config parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') parser.add_argument('--is_training', type=int, required=True, default=1, help='status') parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') parser.add_argument('--model', type=str, required=True, default='Autoformer', help='model name, options: [Autoformer, Transformer, TimesNet]') # data loader parser.add_argument('--data', type=str, required=True, default='ETTh1', help='dataset type') parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file') parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') parser.add_argument('--features', type=str, default='M', help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate') parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') parser.add_argument('--freq', type=str, default='h', help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h') parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') # forecasting task parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') parser.add_argument('--label_len', type=int, default=48, help='start token length') parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False) # inputation task parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio') # anomaly detection task parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%%)') # model define parser.add_argument('--expand', type=int, default=2, help='expansion factor for Mamba') parser.add_argument('--d_conv', type=int, default=4, help='conv kernel size for Mamba') parser.add_argument('--tv_dt', type=int, default=0, help='whether to use time variant dt for MambaSL') parser.add_argument('--tv_B', type=int, default=0, help='whether to use time variant B for MambaSL') parser.add_argument('--tv_C', type=int, default=0, help='whether to use time variant C for MambaSL') parser.add_argument('--use_D', type=int, default=0, help='whether to use D for MambaSL') parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock') parser.add_argument('--num_kernels', type=int, default=6, help='for Inception') parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') parser.add_argument('--c_out', type=int, default=7, help='output size') parser.add_argument('--d_model', type=int, default=512, help='dimension of model') parser.add_argument('--n_heads', type=int, default=8, help='num of heads') parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn') parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') parser.add_argument('--factor', type=int, default=1, help='attn factor') parser.add_argument('--distil', action='store_false', help='whether to use distilling in encoder, using this argument means not using distilling', default=True) parser.add_argument('--dropout', type=float, default=0.1, help='dropout') parser.add_argument('--embed', type=str, default='timeF', help='time features encoding, options:[timeF, fixed, learned]') parser.add_argument('--activation', type=str, default='gelu', help='activation') parser.add_argument('--channel_independence', type=int, default=1, help='0: channel dependence 1: channel independence for FreTS model') parser.add_argument('--decomp_method', type=str, default='moving_avg', help='method of series decompsition, only support moving_avg or dft_decomp') parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0') parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers') parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size') parser.add_argument('--down_sampling_method', type=str, default=None, help='down sampling method, only support avg, max, conv') parser.add_argument('--seg_len', type=int, default=96, help='the length of segmen-wise iteration of SegRNN') # optimization parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') parser.add_argument('--itr', type=int, default=1, help='experiments times') parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data') parser.add_argument('--patience', type=int, default=3, help='early stopping patience') parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate') parser.add_argument('--des', type=str, default='test', help='exp description') parser.add_argument('--loss', type=str, default='MSE', help='loss function') parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate') parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) # GPU parser.add_argument('--use_gpu', action='store_true', default=True, help='use gpu (default: on)') parser.add_argument('--no_use_gpu', action='store_false', dest='use_gpu', help='disable gpu (force cpu)') parser.add_argument('--gpu', type=int, default=0, help='gpu') parser.add_argument('--gpu_type', type=str, default='cuda', help='gpu type') # cuda or mps parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False) parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus') # de-stationary projector params parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128], help='hidden layer dimensions of projector (List)') parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector') # metrics (dtw) parser.add_argument('--use_dtw', action='store_true', default=False, help='enable dtw metric (time consuming; default: off)') # Augmentation parser.add_argument('--augmentation_ratio', type=int, default=0, help="How many times to augment") parser.add_argument('--seed', type=int, default=2, help="Randomization seed") parser.add_argument('--jitter', default=False, action="store_true", help="Jitter preset augmentation") parser.add_argument('--scaling', default=False, action="store_true", help="Scaling preset augmentation") parser.add_argument('--permutation', default=False, action="store_true", help="Equal Length Permutation preset augmentation") parser.add_argument('--randompermutation', default=False, action="store_true", help="Random Length Permutation preset augmentation") parser.add_argument('--magwarp', default=False, action="store_true", help="Magnitude warp preset augmentation") parser.add_argument('--timewarp', default=False, action="store_true", help="Time warp preset augmentation") parser.add_argument('--windowslice', default=False, action="store_true", help="Window slice preset augmentation") parser.add_argument('--windowwarp', default=False, action="store_true", help="Window warp preset augmentation") parser.add_argument('--rotation', default=False, action="store_true", help="Rotation preset augmentation") parser.add_argument('--spawner', default=False, action="store_true", help="SPAWNER preset augmentation") parser.add_argument('--dtwwarp', default=False, action="store_true", help="DTW warp preset augmentation") parser.add_argument('--shapedtwwarp', default=False, action="store_true", help="Shape DTW warp preset augmentation") parser.add_argument('--wdba', default=False, action="store_true", help="Weighted DBA preset augmentation") parser.add_argument('--discdtw', default=False, action="store_true", help="Discrimitive DTW warp preset augmentation") parser.add_argument('--discsdtw', default=False, action="store_true", help="Discrimitive shapeDTW warp preset augmentation") parser.add_argument('--extra_tag', type=str, default="", help="Anything extra") # TimeXer parser.add_argument('--patch_len', type=int, default=16, help='patch length') # GCN parser.add_argument('--node_dim', type=int, default=10, help='each node embbed to dim dimentions') parser.add_argument('--gcn_depth', type=int, default=2, help='') parser.add_argument('--gcn_dropout', type=float, default=0.3, help='') parser.add_argument('--propalpha', type=float, default=0.3, help='') parser.add_argument('--conv_channel', type=int, default=32, help='') parser.add_argument('--skip_channel', type=int, default=32, help='') parser.add_argument('--individual', action='store_true', default=False, help='DLinear: a linear layer for each variate(channel) individually') # TimeFilter parser.add_argument('--alpha', type=float, default=0.1, help='KNN for Graph Construction') parser.add_argument('--top_p', type=float, default=0.5, help='Dynamic Routing in MoE') parser.add_argument('--pos', type=int, choices=[0, 1], default=1, help='Positional Embedding. Set pos to 0 or 1') args = parser.parse_args() if torch.cuda.is_available() and args.use_gpu: args.device = torch.device('cuda:{}'.format(args.gpu)) print('Using GPU') else: if hasattr(torch.backends, "mps"): args.device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu") else: args.device = torch.device("cpu") print('Using cpu or mps') if args.use_gpu and args.use_multi_gpu: args.devices = args.devices.replace(' ', '') device_ids = args.devices.split(',') args.device_ids = [int(id_) for id_ in device_ids] args.gpu = args.device_ids[0] print('Args in experiment:') print_args(args) if args.task_name == 'long_term_forecast': from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast Exp = Exp_Long_Term_Forecast elif args.task_name == 'short_term_forecast': from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast Exp = Exp_Short_Term_Forecast elif args.task_name == 'imputation': from exp.exp_imputation import Exp_Imputation Exp = Exp_Imputation elif args.task_name == 'anomaly_detection': from exp.exp_anomaly_detection import Exp_Anomaly_Detection Exp = Exp_Anomaly_Detection elif args.task_name == 'classification': from exp.exp_classification import Exp_Classification Exp = Exp_Classification elif args.task_name == 'zero_shot_forecast': from exp.exp_zero_shot_forecasting import Exp_Zero_Shot_Forecast Exp = Exp_Zero_Shot_Forecast else: from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast Exp = Exp_Long_Term_Forecast if args.is_training: for ii in range(args.itr): # setting record of experiments exp = Exp(args) # set experiments setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format( args.task_name, args.model_id, args.model, args.data, args.features, args.seq_len, args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.expand, args.d_conv, args.factor, args.embed, args.distil, args.des, ii) # Override setting for specific model to ensure proper checkpoint naming and logging if args.model == 'MambaSingleLayer' and args.task_name == 'classification': setting = f'{args.task_name}_CLS_{args.model_id}_{args.model}_{args.data}_ft{args.features}' \ + f'_sl{args.seq_len}_ll{args.label_len}_pl{args.pred_len}_dm{args.d_model}_ds{args.d_ff}' \ + f'_expand{args.expand}_dc{args.d_conv}_nk{args.num_kernels}' \ + f'_tvdt{int(args.tv_dt)}_tvB{int(args.tv_B)}_tvC{int(args.tv_C)}_useD{int(args.use_D)}_{args.des}_{ii}' print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) exp.train(setting) print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting) if args.use_gpu: if args.gpu_type == 'mps': torch.backends.mps.empty_cache() elif args.gpu_type == 'cuda': torch.cuda.empty_cache() else: exp = Exp(args) # set experiments ii = 0 setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format( args.task_name, args.model_id, args.model, args.data, args.features, args.seq_len, args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.expand, args.d_conv, args.factor, args.embed, args.distil, args.des, ii) # Override setting for specific model to ensure proper checkpoint naming and logging if args.model == 'MambaSingleLayer' and args.task_name == 'classification': setting = f'{args.task_name}_CLS_{args.model_id}_{args.model}_{args.data}_ft{args.features}' \ + f'_sl{args.seq_len}_ll{args.label_len}_pl{args.pred_len}_dm{args.d_model}_ds{args.d_ff}' \ + f'_expand{args.expand}_dc{args.d_conv}_nk{args.num_kernels}' \ + f'_tvdt{args.tv_dt}_tvB{args.tv_B}_tvC{args.tv_C}_useD{int(args.use_D)}_{args.des}_{ii}' print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) exp.test(setting, test=1) if args.use_gpu: if args.gpu_type == 'mps': torch.backends.mps.empty_cache() elif args.gpu_type == 'cuda': torch.cuda.empty_cache() ================================================ FILE: scripts/anomaly_detection/MSL/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model Autoformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model Crossformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/DLinear.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model DLinear \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 100 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/ETSformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model ETSformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 100 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --d_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/FEDformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model FEDformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=6 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model FiLM \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 100 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 32 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/Informer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model Informer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/KANAD.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model KANAD \ --data MSL \ --features M \ --seq_len 64 \ --d_model 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --learning_rate 0.01 \ --batch_size 128 \ --num_workers 4 \ --patience 5 \ --train_epochs 100 ================================================ FILE: scripts/anomaly_detection/MSL/LightTS.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model LightTS \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=1 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model MICN \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model Pyraformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/Reformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model Reformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=2 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model TimesNet \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 8 \ --d_ff 16 \ --e_layers 1 \ --enc_in 55 \ --c_out 55 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 1 ================================================ FILE: scripts/anomaly_detection/MSL/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model Transformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/MSL/iTransformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/MSL \ --model_id MSL \ --model iTransformer \ --data MSL \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 55 \ --c_out 55 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/PSM/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=6 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/PSM \ --model_id PSM \ --model Autoformer \ --data PSM \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/PSM/DLinear.sh ================================================ export CUDA_VISIBLE_DEVICES=6 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/PSM \ --model_id PSM \ --model DLinear \ --data PSM \ --features M \ --seq_len 100 \ --pred_len 100 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/PSM/KANAD.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/PSM \ --model_id PSM \ --model KANAD \ --data PSM \ --features M \ --seq_len 64 \ --d_model 6 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --learning_rate 0.01 \ --batch_size 128 \ --num_workers 4 \ --patience 5 \ --train_epochs 100 ================================================ FILE: scripts/anomaly_detection/PSM/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=6 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/PSM \ --model_id PSM \ --model TimesNet \ --data PSM \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 64 \ --d_ff 64 \ --e_layers 2 \ --enc_in 25 \ --c_out 25 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/PSM/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=6 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/PSM \ --model_id PSM \ --model Transformer \ --data PSM \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/SMAP/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMAP \ --model_id SMAP \ --model Autoformer \ --data SMAP \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/SMAP/KANAD.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMAP \ --model_id SMAP \ --model KANAD \ --data SMAP \ --features M \ --seq_len 64 \ --d_model 3 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --learning_rate 0.01 \ --batch_size 128 \ --num_workers 4 \ --patience 5 \ --train_epochs 100 ================================================ FILE: scripts/anomaly_detection/SMAP/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMAP \ --model_id SMAP \ --model TimesNet \ --data SMAP \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 25 \ --c_out 25 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/SMAP/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMAP \ --model_id SMAP \ --model Transformer \ --data SMAP \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 25 \ --c_out 25 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/SMD/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=2 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMD \ --model_id SMD \ --model Autoformer \ --data SMD \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 38 \ --c_out 38 \ --anomaly_ratio 0.5 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/SMD/KANAD.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMD \ --model_id SMD \ --model KANAD \ --data SMD \ --features M \ --seq_len 96 \ --d_model 4 \ --enc_in 38 \ --c_out 38 \ --anomaly_ratio 0.5 \ --learning_rate 0.01 \ --batch_size 128 \ --num_workers 4 \ --patience 5 \ --train_epochs 100 ================================================ FILE: scripts/anomaly_detection/SMD/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=2 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMD \ --model_id SMD \ --model TimesNet \ --data SMD \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 64 \ --d_ff 64 \ --e_layers 2 \ --enc_in 38 \ --c_out 38 \ --top_k 5 \ --anomaly_ratio 0.5 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/SMD/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=2 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SMD \ --model_id SMD \ --model Transformer \ --data SMD \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 38 \ --c_out 38 \ --anomaly_ratio 0.5 \ --batch_size 128 \ --train_epochs 10 ================================================ FILE: scripts/anomaly_detection/SWAT/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model Autoformer \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/SWAT/KANAD.sh ================================================ export CUDA_VISIBLE_DEVICES=0 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model KANAD \ --data SWAT \ --features M \ --seq_len 80 \ --d_model 1 \ --enc_in 51 \ --c_out 51 \ --anomaly_ratio 1 \ --learning_rate 0.01 \ --batch_size 128 \ --num_workers 4 \ --patience 5 \ --train_epochs 100 ================================================ FILE: scripts/anomaly_detection/SWAT/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=1 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 8 \ --d_ff 8 \ --e_layers 3 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 16 \ --d_ff 16 \ --e_layers 3 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 32 \ --d_ff 32 \ --e_layers 3 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 64 \ --d_ff 64 \ --e_layers 3 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 8 \ --d_ff 8 \ --e_layers 2 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 16 \ --d_ff 16 \ --e_layers 2 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 32 \ --d_ff 32 \ --e_layers 2 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model TimesNet \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 64 \ --d_ff 64 \ --e_layers 2 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/anomaly_detection/SWAT/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 python -u run.py \ --task_name anomaly_detection \ --is_training 1 \ --root_path ./dataset/SWaT \ --model_id SWAT \ --model Transformer \ --data SWAT \ --features M \ --seq_len 100 \ --pred_len 0 \ --d_model 128 \ --d_ff 128 \ --e_layers 3 \ --enc_in 51 \ --c_out 51 \ --top_k 3 \ --anomaly_ratio 1 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/classification/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Autoformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=Crossformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/DLinear.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=DLinear python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/ETSformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=ETSformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --d_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/FEDformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=FEDformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=FiLM python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --seq_len 1751 \ --pred_len 1751 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/Informer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Informer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/LightTS.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=LightTS python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=MICN python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --c_out 3 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/MambaSL.out ================================================ Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: ArticularyWordRecognitionModel: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/ArticularyWordRecognition Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 128 n heads: 8 e layers: 2 d layers: 1 d FF: 8 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 275 300 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_ArticularyWordRecognition_MambaSingleLayer_UEA_ftM_sl144_ll0_pl0_dm128_ds8_expand1_dc4_nk3_tvdt0_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 300 loading model test shape: torch.Size([300, 25]) torch.Size([300, 1]) accuracy:0.9933333333333333 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: AtrialFibrillation Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/AtrialFibrillation Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 13 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 16 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 15 15 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_AtrialFibrillation_MambaSingleLayer_UEA_ftM_sl640_ll0_pl0_dm32_ds16_expand1_dc4_nk13_tvdt1_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 15 loading model test shape: torch.Size([15, 3]) torch.Size([15, 1]) accuracy:0.5333333333333333 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: BasicMotions Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/BasicMotions Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 40 40 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_BasicMotions_MambaSingleLayer_UEA_ftM_sl100_ll0_pl0_dm32_ds1_expand1_dc4_nk3_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 40 loading model test shape: torch.Size([40, 4]) torch.Size([40, 1]) accuracy:1.0 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: CharacterTrajectoriesModel: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/CharacterTrajectories Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 4 Enc In: 7 Dec In: 7 C Out: 7 d model: 128 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 1422 1436 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_CharacterTrajectories_MambaSingleLayer_UEA_ftM_sl182_ll0_pl0_dm128_ds1_expand1_dc4_nk4_tvdt1_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 1436 loading model test shape: torch.Size([1436, 20]) torch.Size([1436, 1]) accuracy:0.9972144846796658 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: Cricket Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/Cricket Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 24 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 108 72 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_Cricket_MambaSingleLayer_UEA_ftM_sl1197_ll0_pl0_dm32_ds4_expand1_dc4_nk24_tvdt0_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 72 loading model test shape: torch.Size([72, 12]) torch.Size([72, 1]) accuracy:1.0 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: DuckDuckGeese Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/DuckDuckGeese Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 6 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 2 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 50 50 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_DuckDuckGeese_MambaSingleLayer_UEA_ftM_sl270_ll0_pl0_dm1024_ds2_expand1_dc4_nk6_tvdt0_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 50 loading model test shape: torch.Size([50, 5]) torch.Size([50, 1]) accuracy:0.7 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: EigenWorms Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/EigenWorms Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 360 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 4 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 128 131 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_EigenWorms_MambaSingleLayer_UEA_ftM_sl17984_ll0_pl0_dm32_ds1_expand1_dc4_nk360_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 131 loading model test shape: torch.Size([131, 5]) torch.Size([131, 1]) accuracy:0.8396946564885496 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: Epilepsy Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/Epilepsy Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 5 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 137 138 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_Epilepsy_MambaSingleLayer_UEA_ftM_sl206_ll0_pl0_dm32_ds1_expand1_dc4_nk5_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 138 loading model test shape: torch.Size([138, 4]) torch.Size([138, 1]) accuracy:0.9782608695652174 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: ERing Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/ERing Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 128 n heads: 8 e layers: 2 d layers: 1 d FF: 8 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 30 270 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_ERing_MambaSingleLayer_UEA_ftM_sl65_ll0_pl0_dm128_ds8_expand1_dc4_nk3_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 270 loading model test shape: torch.Size([270, 6]) torch.Size([270, 1]) accuracy:0.937037037037037 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: EthanolConcentrationModel: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/EthanolConcentration Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 36 Enc In: 7 Dec In: 7 C Out: 7 d model: 512 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 261 263 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_EthanolConcentration_MambaSingleLayer_UEA_ftM_sl1751_ll0_pl0_dm512_ds4_expand1_dc4_nk36_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 263 loading model test shape: torch.Size([263, 4]) torch.Size([263, 1]) accuracy:0.42585551330798477 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: FaceDetection Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/FaceDetection Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 256 n heads: 8 e layers: 2 d layers: 1 d FF: 16 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 5890 3524 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_FaceDetection_MambaSingleLayer_UEA_ftM_sl62_ll0_pl0_dm256_ds16_expand1_dc4_nk3_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 3524 loading model test shape: torch.Size([3524, 2]) torch.Size([3524, 1]) accuracy:0.6929625425652668 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: FingerMovements Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/FingerMovements Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 316 100 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_FingerMovements_MambaSingleLayer_UEA_ftM_sl50_ll0_pl0_dm32_ds1_expand1_dc4_nk3_tvdt0_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 100 loading model test shape: torch.Size([100, 2]) torch.Size([100, 1]) accuracy:0.71 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: HandMovementDirectionModel: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/HandMovementDirection Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 8 Enc In: 7 Dec In: 7 C Out: 7 d model: 256 n heads: 8 e layers: 2 d layers: 1 d FF: 16 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 160 74 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_HandMovementDirection_MambaSingleLayer_UEA_ftM_sl400_ll0_pl0_dm256_ds16_expand1_dc4_nk8_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 74 loading model test shape: torch.Size([74, 4]) torch.Size([74, 1]) accuracy:0.7027027027027027 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: Handwriting Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/Handwriting Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 4 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 150 850 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_Handwriting_MambaSingleLayer_UEA_ftM_sl152_ll0_pl0_dm1024_ds4_expand1_dc4_nk4_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 850 loading model test shape: torch.Size([850, 26]) torch.Size([850, 1]) accuracy:0.6082352941176471 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: Heartbeat Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/Heartbeat Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 9 Enc In: 7 Dec In: 7 C Out: 7 d model: 64 n heads: 8 e layers: 2 d layers: 1 d FF: 16 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 204 205 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_Heartbeat_MambaSingleLayer_UEA_ftM_sl405_ll0_pl0_dm64_ds16_expand1_dc4_nk9_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 205 loading model test shape: torch.Size([205, 2]) torch.Size([205, 1]) accuracy:0.8048780487804879 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: InsectWingbeat Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/InsectWingbeat Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 8 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 25000 25000 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_InsectWingbeat_MambaSingleLayer_UEA_ftM_sl22_ll0_pl0_dm1024_ds8_expand1_dc4_nk3_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 25000 loading model test shape: torch.Size([25000, 10]) torch.Size([25000, 1]) accuracy:0.66304 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: JapaneseVowels Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/JapaneseVowels Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 128 n heads: 8 e layers: 2 d layers: 1 d FF: 8 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 270 370 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_JapaneseVowels_MambaSingleLayer_UEA_ftM_sl29_ll0_pl0_dm128_ds8_expand1_dc4_nk3_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 370 loading model test shape: torch.Size([370, 9]) torch.Size([370, 1]) accuracy:0.9864864864864865 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: Libras Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/Libras Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 180 180 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_Libras_MambaSingleLayer_UEA_ftM_sl45_ll0_pl0_dm1024_ds4_expand1_dc4_nk3_tvdt1_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 180 loading model test shape: torch.Size([180, 15]) torch.Size([180, 1]) accuracy:0.9166666666666666 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: LSST Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/LSST Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 2459 2466 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_LSST_MambaSingleLayer_UEA_ftM_sl36_ll0_pl0_dm1024_ds4_expand1_dc4_nk3_tvdt1_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2466 loading model test shape: torch.Size([2466, 14]) torch.Size([2466, 1]) accuracy:0.4557988645579886 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: MotorImagery Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/MotorImagery Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 60 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 8 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 278 100 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_MotorImagery_MambaSingleLayer_UEA_ftM_sl3000_ll0_pl0_dm32_ds8_expand1_dc4_nk60_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 100 loading model test shape: torch.Size([100, 2]) torch.Size([100, 1]) accuracy:0.69 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: NATOPS Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/NATOPS Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 512 n heads: 8 e layers: 2 d layers: 1 d FF: 2 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 180 180 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_NATOPS_MambaSingleLayer_UEA_ftM_sl51_ll0_pl0_dm512_ds2_expand1_dc4_nk3_tvdt0_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 180 loading model test shape: torch.Size([180, 6]) torch.Size([180, 1]) accuracy:0.9888888888888889 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: PEMS-SF Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/PEMS-SF Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 512 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 267 173 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_PEMS-SF_MambaSingleLayer_UEA_ftM_sl144_ll0_pl0_dm512_ds1_expand1_dc4_nk3_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 173 loading model test shape: torch.Size([173, 7]) torch.Size([173, 1]) accuracy:0.8554913294797688 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: PenDigits Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/PenDigits Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 64 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 7494 3498 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_PenDigits_MambaSingleLayer_UEA_ftM_sl8_ll0_pl0_dm64_ds1_expand1_dc4_nk3_tvdt0_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 3498 loading model test shape: torch.Size([3498, 10]) torch.Size([3498, 1]) accuracy:0.9925671812464265 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: PhonemeSpectra Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/PhonemeSpectra Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 5 Enc In: 7 Dec In: 7 C Out: 7 d model: 256 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 3315 3353 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_PhonemeSpectra_MambaSingleLayer_UEA_ftM_sl217_ll0_pl0_dm256_ds4_expand1_dc4_nk5_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 3353 loading model test shape: torch.Size([3353, 39]) torch.Size([3353, 1]) accuracy:0.3033104682373993 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: RacketSports Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/RacketSports Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 4 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 151 152 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_RacketSports_MambaSingleLayer_UEA_ftM_sl30_ll0_pl0_dm1024_ds4_expand1_dc4_nk3_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 152 loading model test shape: torch.Size([152, 4]) torch.Size([152, 1]) accuracy:0.9276315789473685 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: SelfRegulationSCP1 Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/SelfRegulationSCP1 Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 18 Enc In: 7 Dec In: 7 C Out: 7 d model: 256 n heads: 8 e layers: 2 d layers: 1 d FF: 16 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 268 293 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_SelfRegulationSCP1_MambaSingleLayer_UEA_ftM_sl896_ll0_pl0_dm256_ds16_expand1_dc4_nk18_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 293 loading model test shape: torch.Size([293, 2]) torch.Size([293, 1]) accuracy:0.9249146757679181 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: SelfRegulationSCP2 Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/SelfRegulationSCP2 Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 24 Enc In: 7 Dec In: 7 C Out: 7 d model: 256 n heads: 8 e layers: 2 d layers: 1 d FF: 16 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 200 180 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_SelfRegulationSCP2_MambaSingleLayer_UEA_ftM_sl1152_ll0_pl0_dm256_ds16_expand1_dc4_nk24_tvdt1_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 180 loading model test shape: torch.Size([180, 2]) torch.Size([180, 1]) accuracy:0.65 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: SpokenArabicDigits Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/SpokenArabicDigits Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 3 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 8 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 6599 2199 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_SpokenArabicDigits_MambaSingleLayer_UEA_ftM_sl93_ll0_pl0_dm1024_ds8_expand1_dc4_nk3_tvdt0_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 2199 loading model test shape: torch.Size([2199, 10]) torch.Size([2199, 1]) accuracy:0.9995452478399273 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: StandWalkJump Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/StandWalkJump Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 50 Enc In: 7 Dec In: 7 C Out: 7 d model: 32 n heads: 8 e layers: 2 d layers: 1 d FF: 1 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 12 15 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_StandWalkJump_MambaSingleLayer_UEA_ftM_sl2500_ll0_pl0_dm32_ds1_expand1_dc4_nk50_tvdt1_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 15 loading model test shape: torch.Size([15, 3]) torch.Size([15, 1]) accuracy:0.7333333333333333 Using GPU Args in experiment: Basic Config Task Name: classification Is Training: 0 Model ID: UWaveGestureLibrary Model: MambaSingleLayer Data Loader Data: UEA Root Path: ./dataset/UWaveGestureLibrary Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints_best/MambaSL Model Parameters Top k: 5 Num Kernels: 7 Enc In: 7 Dec In: 7 C Out: 7 d model: 1024 n heads: 8 e layers: 2 d layers: 1 d FF: 2 Moving Avg: 25 Factor: 1 Distil: 1 Dropout: 0.1 Embed: timeF Activation: gelu Run Parameters Num Workers: 10 Itr: 1 Train Epochs: 100 Batch Size: 16 Patience: 10 Learning Rate: 0.001 Des: gating4proposed Loss: MSE Lradj: type1 Use Amp: 0 GPU Use GPU: 1 GPU: 0 Use Multi GPU: 0 Devices: 0,1,2,3 De-stationary Projector Params P Hidden Dims: 128, 128 P Hidden Layers: 2 Use GPU: cuda:0 120 320 🚀 Lazy Loading: MambaSingleLayer ... >>>>>>>testing : classification_CLS_UWaveGestureLibrary_MambaSingleLayer_UEA_ftM_sl315_ll0_pl0_dm1024_ds2_expand1_dc4_nk7_tvdt0_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 320 loading model test shape: torch.Size([320, 8]) torch.Size([320, 1]) accuracy:0.934375 ================================================ FILE: scripts/classification/MambaSL.sh ================================================ # This script is for reproducing the MambaSL classification results on the 30 UEA datasets. # Download checkpoints # from https://drive.google.com/drive/folders/1dJx_rpB7UnkMuxrCEoHJcXXzhaACS5Sx?usp=share_link (checkpoint_best/MambaSL.zip) # and change the `checkpoint_dir` variable to the path of the downloaded checkpoints. # If you want to reproduce the other baseline results reported in MambaSL paper (https://openreview.net/pdf?id=YDl4vqQqGP), # please refer to the official MambaSL repo: https://github.com/yoom618/MambaSL # Global Setting model_name="MambaSingleLayer" gpu_id=0 resource_dir="." data_dir="${resource_dir}/dataset" checkpoint_dir="${resource_dir}/checkpoints_best/MambaSL" run_model() { local dn=$1; local dm=$2; local df=$3; local dt=$4; local tb=$5; local tc=$6; local nk=$7; local bs=${8:-16} python run.py \ --use_gpu --gpu_type cuda --gpu ${gpu_id} \ --task_name classification --data UEA \ --root_path "${data_dir}/${dn}" \ --checkpoints "${checkpoint_dir}" \ --model "${model_name}" \ --model_id "${dn}" \ --d_model $dm --d_ff $df --expand 1 --d_conv 4 \ --tv_dt $dt --tv_B $tb --tv_C $tc --use_D 0 \ --num_kernels $nk \ --is_training 0 --pred_len 0 --label_len 0 --batch_size $bs \ --des gating4proposed --itr 1 --dropout 0.1 \ --learning_rate 0.001 --train_epochs 100 --patience 10 } # ArticularyWordRecognition run_model "ArticularyWordRecognition" 128 8 0 0 1 3 16 # AtrialFibrillation run_model "AtrialFibrillation" 32 16 1 0 0 13 16 # BasicMotions run_model "BasicMotions" 32 1 0 0 0 3 16 # CharacterTrajectories run_model "CharacterTrajectories" 128 1 1 0 0 4 16 # Cricket run_model "Cricket" 32 4 0 1 0 24 16 # DuckDuckGeese run_model "DuckDuckGeese" 1024 2 0 0 1 6 16 # EigenWorms run_model "EigenWorms" 32 1 1 1 0 360 4 # Epilepsy run_model "Epilepsy" 32 1 1 1 0 5 16 # ERing run_model "ERing" 128 8 1 0 1 3 16 # EthanolConcentration run_model "EthanolConcentration" 512 4 0 0 0 36 16 # FaceDetection run_model "FaceDetection" 256 16 1 0 1 3 16 # FingerMovements run_model "FingerMovements" 32 1 0 1 1 3 16 # HandMovementDirection run_model "HandMovementDirection" 256 16 1 0 1 8 16 # Handwriting run_model "Handwriting" 1024 4 1 0 1 4 16 # Heartbeat run_model "Heartbeat" 64 16 0 0 0 9 16 # InsectWingbeat run_model "InsectWingbeat" 1024 8 0 0 0 3 16 # JapaneseVowels run_model "JapaneseVowels" 128 8 1 1 0 3 16 # Libras run_model "Libras" 1024 4 1 1 1 3 16 # LSST run_model "LSST" 1024 4 1 1 1 3 16 # MotorImagery run_model "MotorImagery" 32 8 0 0 0 60 16 # NATOPS run_model "NATOPS" 512 2 0 1 0 3 16 # PEMS-SF run_model "PEMS-SF" 512 1 1 1 0 3 16 # PenDigits run_model "PenDigits" 64 1 0 1 1 3 16 # PhonemeSpectra run_model "PhonemeSpectra" 256 4 1 1 0 5 16 # RacketSports run_model "RacketSports" 1024 4 1 0 1 3 16 # SelfRegulationSCP1 run_model "SelfRegulationSCP1" 256 16 1 0 1 18 16 # SelfRegulationSCP2 run_model "SelfRegulationSCP2" 256 16 1 1 1 24 16 # SpokenArabicDigits run_model "SpokenArabicDigits" 1024 8 0 1 0 3 16 # StandWalkJump run_model "StandWalkJump" 32 1 1 0 0 50 16 # UWaveGestureLibrary run_model "UWaveGestureLibrary" 1024 2 0 0 1 7 16 ================================================ FILE: scripts/classification/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=PatchTST python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Pyraformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 4 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/Reformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Reformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model TimesNet \ --data UEA \ --e_layers 2 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model TimesNet \ --data UEA \ --e_layers 2 \ --batch_size 16 \ --d_model 64 \ --d_ff 256 \ --top_k 3 \ --num_kernels 4 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model TimesNet \ --data UEA \ --e_layers 2 \ --batch_size 16 \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model TimesNet \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 1 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model TimesNet \ --data UEA \ --e_layers 2 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 60 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model TimesNet \ --data UEA \ --e_layers 6 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model TimesNet \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model TimesNet \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model TimesNet \ --data UEA \ --e_layers 2 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 2 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model TimesNet \ --data UEA \ --e_layers 2 \ --batch_size 16 \ --d_model 32 \ --d_ff 64 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 30 \ --patience 10 ================================================ FILE: scripts/classification/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Transformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 ================================================ FILE: scripts/classification/iTransformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 2048 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/FaceDetection/ \ --model_id FaceDetection \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Handwriting/ \ --model_id Handwriting \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/Heartbeat/ \ --model_id Heartbeat \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/JapaneseVowels/ \ --model_id JapaneseVowels \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/PEMS-SF/ \ --model_id PEMS-SF \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP1/ \ --model_id SelfRegulationSCP1 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SelfRegulationSCP2/ \ --model_id SelfRegulationSCP2 \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/SpokenArabicDigits/ \ --model_id SpokenArabicDigits \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/UWaveGestureLibrary/ \ --model_id UWaveGestureLibrary \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --enc_in 3 ================================================ FILE: scripts/exogenous_forecast/ECL/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des $des \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des $des \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des $des \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des $des \ --d_model 512 \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/EPF/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer des='Timexer-MS' patch_len=24 python -u run.py \ --is_training 1 \ --task_name long_term_forecast \ --root_path ./dataset/EPF/ \ --data_path NP.csv \ --model_id NP_168_24 \ --model $model_name \ --data custom \ --features MS \ --seq_len 168 \ --pred_len 24 \ --e_layers 3 \ --enc_in 3 \ --dec_in 3 \ --c_out 1 \ --des $des \ --patch_len $patch_len \ --d_model 512 \ --d_ff 512 \ --batch_size 4 \ --itr 1 python -u run.py \ --is_training 1 \ --task_name long_term_forecast \ --root_path ./dataset/EPF/ \ --data_path PJM.csv \ --model_id PJM_168_24 \ --model $model_name \ --data custom \ --features MS \ --seq_len 168 \ --pred_len 24 \ --e_layers 3 \ --enc_in 3 \ --dec_in 3 \ --c_out 1 \ --des $des \ --patch_len $patch_len \ --d_model 512 \ --batch_size 16 \ --itr 1 python -u run.py \ --is_training 1 \ --task_name long_term_forecast \ --root_path ./dataset/EPF/ \ --data_path BE.csv \ --model_id BE_168_24 \ --model $model_name \ --data custom \ --features MS \ --seq_len 168 \ --pred_len 24 \ --e_layers 2 \ --enc_in 3 \ --dec_in 3 \ --c_out 1 \ --des $des \ --patch_len $patch_len \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --itr 1 python -u run.py \ --is_training 1 \ --task_name long_term_forecast \ --root_path ./dataset/EPF/ \ --data_path FR.csv \ --model_id FR_168_24 \ --model $model_name \ --data custom \ --features MS \ --seq_len 168 \ --pred_len 24 \ --e_layers 2 \ --enc_in 3 \ --dec_in 3 \ --c_out 1 \ --des $des \ --patch_len $patch_len \ --batch_size 16 \ --d_model 512 \ --itr 1 python -u run.py \ --is_training 1 \ --task_name long_term_forecast \ --root_path ./dataset/EPF/ \ --data_path DE.csv \ --model_id DE_168_24 \ --model $model_name \ --data custom \ --features MS \ --seq_len 168 \ --pred_len 24 \ --e_layers 1 \ --enc_in 3 \ --dec_in 3 \ --c_out 1 \ --des $des \ --patch_len $patch_len \ --batch_size 4 \ --d_model 512 \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/ETTh1/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 512 \ --d_ff 512 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --d_ff 128 \ --batch_size 4 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 512 \ --d_ff 512 \ --batch_size 32 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 512 \ --batch_size 128 \ --des $des \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/ETTh2/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --d_ff 128 \ --batch_size 128 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --d_ff 512 \ --batch_size 128 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --d_ff 256 \ --batch_size 16 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des $des \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/ETTm1/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --batch_size 128 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --batch_size 128 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --batch_size 128 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --batch_size 128 \ --des $des \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/ETTm2/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 512 \ --batch_size 16 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --batch_size 4 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --batch_size 128 \ --des $des \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 128 \ --batch_size 128 \ --des $des \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/Traffic/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --des $des \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --des $des \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --des $des \ --batch_size 4 \ --itr 1 ================================================ FILE: scripts/exogenous_forecast/Weather/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=TimeXer des='Timexer-MS' python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des $des \ --d_model 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des $des \ --d_model 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des $des \ --d_model 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features MS \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des $des \ --d_model 128 \ --itr 1 ================================================ FILE: scripts/imputation/ECL_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Autoformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/DLinear.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=DLinear python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/ETSformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=ETSformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/FEDformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=FEDformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/Informer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Informer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/LightTS.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=LightTS python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Pyraformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/Reformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Reformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=TimesNet python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ECL_script/iTransformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Autoformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Autoformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Autoformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Autoformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Autoformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Autoformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Autoformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Autoformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Crossformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Crossformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/DLinear_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=DLinear python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/FiLM_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=FiLM python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/MICN_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MICN python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --conv_kernel 12 16 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --conv_kernel 12 16 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --conv_kernel 12 16 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --conv_kernel 12 16 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Nonstationary_Transformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Nonstationary_Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 ================================================ FILE: scripts/imputation/ETT_script/TiDE_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TiDE python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 2 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 256 \ --d_ff 256 \ --dropout 0.3 \ --learning_rate 0.1 \ --patience 5 \ --train_epochs 10 \ python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/TimesNet_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimesNet python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/TimesNet_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=TimesNet python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/TimesNet_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TimesNet python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/TimesNet_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=TimesNet python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Transformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Transformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Transformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/Transformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/ETT_script/iTransformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/Weather_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Autoformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/Weather_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=TimesNet python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --top_k 3 \ --learning_rate 0.001 ================================================ FILE: scripts/imputation/Weather_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.125 \ --mask_rate 0.125 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.25 \ --mask_rate 0.25 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.375 \ --mask_rate 0.375 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 python -u run.py \ --task_name imputation \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_mask_0.5 \ --mask_rate 0.5 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 0 \ --pred_len 0 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --batch_size 16 \ --d_model 128 \ --d_ff 128 \ --des 'Exp' \ --itr 1 \ --top_k 5 \ --learning_rate 0.001 ================================================ FILE: scripts/long_term_forecast/AugmentSample/Classification/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=PatchTST for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp wdba discdtw discsdtw do echo using augmentation: ${aug} python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --augmentation_ratio 1 \ --${aug} done ================================================ FILE: scripts/long_term_forecast/AugmentSample/Forecasting/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=PatchTST for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp discdtw discsdtw do for pred_len in 96 192 336 720 do echo using augmentation: ${aug} python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_${pred_len} \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len ${pred_len} \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --augmentation_ratio 1 \ --${aug} done done ================================================ FILE: scripts/long_term_forecast/AugmentSample/ReadMe.md ================================================ # Augmentation Feature Roadbook Hi there! For those who are interested in testing augmentation techniques in `Time-Series-Library`. For now, we have embedded several augmentation methods in this repo. We are still collecting publicly available augmentation algorithms, and we appreciate your valuable advice! ``` The Implemented Augmentation Methods 1. jitter 2. scaling 3. permutation 4. magwarp 5. timewarp 6. windowslice 7. windowwarp 8. rotation 9. spawner 10. dtwwarp 11. shapedtwwarp 12. wdba (Specially Designed for Classification tasks) 13. discdtw ``` ## Usage In this folder, we present two sample of shell scripts doing augmentation in `Forecasting` and `Classification` tasks. Take `Forecasting` task for example, we test multiple augmentation algorithms on `EthanolConcentration` dataset (a subset of the popular classification benchmark `UEA`) using `PatchTST` model. ```shell export CUDA_VISIBLE_DEVICES=0 model_name=PatchTST for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp wdba discdtw discsdtw do echo using augmentation: ${aug} python -u run.py \ --task_name classification \ --is_training 1 \ --root_path ./dataset/EthanolConcentration/ \ --model_id EthanolConcentration \ --model $model_name \ --data UEA \ --e_layers 3 \ --batch_size 16 \ --d_model 128 \ --d_ff 256 \ --top_k 3 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --train_epochs 100 \ --patience 10 \ --augmentation_ratio 1 \ --${aug} done ``` Here, parameter `augmentation_ratio` represents how many times do we want to perform our augmentation method. Parameter `${aug}` represents a string of augmentation type label. The example here only perform augmentation once, so we can set `augmentation_ratio` to `1`, followed by one augmentation type label. Trivially, you can set `augmentation_ratio` to an integer `num` followed by `num` augmentation type labels. The augmentation code obeys the same prototype of `Time-Series-Library`. If you want to adjust other training parameters, feel free to add arguments to the shell scripts and play around. The full list of parameters can be seen in `run.py`. ## Contact Us! This piece of code is written and maintained by [Yunzhong Qiu](https://github.com/DigitalLifeYZQiu). We thank [Haixu Wu](https://github.com/wuhaixu2016) and [Jiaxiang Dong](https://github.com/dongjiaxiang) for insightful discussion and solid support. If you have difficulties or find bugs in our code, please contact us: - Email: qiuyz24@mails.tsinghua.edu.cn ================================================ FILE: scripts/long_term_forecast/ECL_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 16 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/DLinear.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=DLinear python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/ETSformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=ETSformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/FEDformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=FEDformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features S \ --seq_len 192 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features S \ --seq_len 192 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features S \ --seq_len 192 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features S \ --seq_len 192 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Informer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Informer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Koopa.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_192_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_288_144 \ --model $model_name \ --data custom \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_384_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/LightTS.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=LightTS python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Mamba.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 # for pred_len in 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_$pred_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --enc_in 321 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 321 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/ECL_script/MultiPatchFormer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MultiPatchFormer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Nonstationary_Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 2048 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 2048 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 2048 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 2048 ================================================ FILE: scripts/long_term_forecast/ECL_script/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 16 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Reformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Reformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/SegRNN.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 321 \ --d_model 512 \ --dropout 0 \ --learning_rate 0.001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/ECL_script/TSMixer.sh ================================================ model_name=TSMixer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/TimeMixer.sh ================================================ #export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=3 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=16 d_ff=32 batch_size=32 train_epochs=20 patience=10 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_$seq_len'_'96 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_$seq_len'_'192 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_$seq_len'_'336 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_$seq_len'_'720 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/ECL_script/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 4 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_ff 512 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 4 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --batch_size 4 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --d_model 256 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features S \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features S \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features S \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features S \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ECL_script/WPMixer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=electricity seq_lens=(512 512 512 512) pred_lens=(96 192 336 720) learning_rates=(0.00328086 0.000493286 0.002505375 0.001977516) batches=(32 32 32 32) epochs=(100 100 100 100) dropouts=(0.1 0.1 0.2 0.1) patch_lens=(16 16 16 16) lradjs=(type3 type3 type3 type3) d_models=(32 32 32 32) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(sym3 coif5 sym4 db2) levels=(2 3 1 2) tfactors=(3 7 5 7) dfactors=(5 5 7 8) strides=(8 8 8 8) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/electricity/ \ --data_path electricity.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/ECL_script/iTransformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.0005 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.0005 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.0005 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/electricity/ \ --data_path electricity.csv \ --model_id ECL_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 321 \ --dec_in 321 \ --c_out 321 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.0005 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/DLinear_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=DLinear python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/ETSformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=ETSformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/FEDformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=FEDformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 336 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 336 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 336 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 336 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 168 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 168 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 168 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 168 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Informer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Informer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_48 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_192_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_288_144 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_384_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_48 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_192_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_288_144 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_384_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_48 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_192_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_288_144 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_384_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_48 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_192_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_288_144 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_384_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/LTSM.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Chronos2 seq_len=2048 for pred_len in 96 192 336 720 do python -u run.py \ --task_name zero_shot_forecast \ --is_training 0 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$seq_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done for pred_len in 96 192 336 720 do python -u run.py \ --task_name zero_shot_forecast \ --is_training 0 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_$seq_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 7 \ --d_model 256 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done for pred_len in 192 336 720 do python -u run.py \ --task_name zero_shot_forecast \ --is_training 0 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_$seq_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done for pred_len in 96 192 336 720 do python -u run.py \ --task_name zero_shot_forecast \ --is_training 0 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_$seq_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/ETT_script/LightTS_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=LightTS python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/MICN_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/MICN_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/MICN_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/MICN_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --top_k 5 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/MambaSimple_ETTh1.sh ================================================ model_name=MambaSimple for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$pred_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 7 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/ETT_script/Mamba_ETT_all.sh ================================================ ./scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh | tee mamba_ett.txt ./scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh | tee mamba_ett.txt -a ./scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh | tee mamba_ett.txt -a ./scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh | tee mamba_ett.txt -a ================================================ FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$pred_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 7 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_$pred_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --enc_in 7 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 7 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_$pred_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --enc_in 7 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 7 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_$pred_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --enc_in 7 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 7 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MultiPatchFormer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MultiPatchFormer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 128 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 128 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 128 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ --d_model 128 ================================================ FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 256 256 \ --p_hidden_layers 4 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 \ ================================================ FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 16 16 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 16 16 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 16 16 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 16 16 \ --p_hidden_layers 4 ================================================ FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 256 256 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 256 256 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 16 16 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 16 16 \ --p_hidden_layers 4 ================================================ FILE: scripts/long_term_forecast/ETT_script/PAttn_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=PAttn python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 2 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 8 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 8 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 16 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 2 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 8 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 8 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 16 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 2 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 2 \ --batch_size 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --batch_size 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --batch_size 128 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 16 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 2 \ --batch_size 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --batch_size 128 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Reformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Reformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$seq_len'_'$pred_len \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_$seq_len'_'$pred_len \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_$seq_len'_'$pred_len \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 48 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_$seq_len'_'$pred_len \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 48 \ --enc_in 7 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTh1.sh ================================================ model_name=TSMixer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTh2.sh ================================================ model_name=TSMixer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTm1.sh ================================================ model_name=TSMixer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTm2.sh ================================================ model_name=TSMixer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TiDE_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TiDE python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 2 \ --enc_in 7 \ --dec_in 7 \ --c_out 8 \ --d_model 256 \ --d_ff 256 \ --dropout 0.3 \ --batch_size 512 \ --learning_rate 0.1 \ --patience 5 \ --train_epochs 10 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 2 \ --enc_in 7 \ --dec_in 7 \ --c_out 8 \ --d_model 256 \ --d_ff 256 \ --dropout 0.3 \ --batch_size 512 \ --learning_rate 0.1 \ --patience 5 \ --train_epochs 10 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 2 \ --enc_in 7 \ --dec_in 7 \ --c_out 8 \ --d_model 256 \ --d_ff 256 \ --dropout 0.3 \ --batch_size 512 \ --learning_rate 0.1 \ --patience 5 \ --train_epochs 10 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 2 \ --enc_in 7 \ --dec_in 7 \ --c_out 8 \ --d_model 256 \ --d_ff 256 \ --dropout 0.3 \ --batch_size 512 \ --learning_rate 0.1 \ --patience 5 \ --train_epochs 10 \ ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=2 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=16 d_ff=32 train_epochs=10 patience=10 batch_size=16 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/\ --data_path ETTh1.csv \ --model_id ETTh1_$seq_len'_'96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --batch_size 128 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$seq_len'_'192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --batch_size 128 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$seq_len'_'336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --batch_size 128 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_$seq_len'_'720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --batch_size 128 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=2 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=16 d_ff=32 batch_size=16 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/\ --data_path ETTh2.csv \ --model_id ETTh2_$seq_len'_'96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_$seq_len'_'192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_$seq_len'_'336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_$seq_len'_'720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=2 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=16 d_ff=32 batch_size=16 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/\ --data_path ETTm1.csv \ --model_id ETTm1_$seq_len'_'96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_$seq_len'_'192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_$seq_len'_'336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_$seq_len'_'720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=2 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=32 d_ff=32 batch_size=16 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/\ --data_path ETTm2.csv \ --model_id ETTm2_$seq_len'_'96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_$seq_len'_'192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_$seq_len'_'336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_$seq_len'_'720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --enc_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --batch_size 4 \ --des 'exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 128 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 512 \ --d_ff 1024 \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 256 \ --d_ff 1024 \ --batch_size 16 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 256 \ --d_ff 1024 \ --batch_size 16 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 256 \ --d_ff 1024 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 512 \ --d_ff 1024 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 256 \ --d_ff 1024 \ --batch_size 16 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --batch_size 4 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 256 \ --batch_size 4 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 1024 \ --batch_size 4 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 512 \ --batch_size 4 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 256 \ --d_ff 1024 \ --batch_size 16 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 512 \ --d_ff 1024 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 512 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 5 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 5 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 5 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 16 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --top_k 5 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTh1.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_96 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_192 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_336 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh1.csv \ --model_id ETTh1_96_720 \ --model $model_name \ --data ETTh1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_96 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_192 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_336 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm1.csv \ --model_id ETTm1_96_720 \ --model $model_name \ --data ETTm1 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTm2.sh ================================================ export CUDA_VISIBLE_DEVICES=2 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_96 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_192 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_336 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTm2.csv \ --model_id ETTm2_96_720 \ --model $model_name \ --data ETTm2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 1 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTh1.sh ================================================ # Set the GPU to use export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=ETTh1 seq_lens=(512 512 512 512) pred_lens=(96 192 336 720) learning_rates=(0.000242438 0.000201437 0.000132929 0.000239762) batches=(256 256 256 256) epochs=(30 30 30 30) dropouts=(0.4 0.05 0.0 0.2) patch_lens=(16 16 16 16) lradjs=(type3 type3 type3 type3) d_models=(256 256 256 128) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(db2 db3 db2 db2) levels=(2 2 1 1) tfactors=(5 5 3 5) dfactors=(8 5 3 3) strides=(8 8 8 8) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/ETT/ \ --data_path ETTh1.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=ETTh2 seq_lens=(512 512 512 512) pred_lens=(96 192 336 720) learning_rates=(0.000466278 0.000294929 0.000617476 0.000810205) batches=(256 256 256 256) epochs=(30 30 30 30) dropouts=(0.0 0.0 0.1 0.4) patch_lens=(16 16 16 16) lradjs=(type3 type3 type3 type3) d_models=(256 256 128 128) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(db2 db2 db2 db2) levels=(2 3 5 5) tfactors=(5 3 5 5) dfactors=(5 8 3 5) strides=(8 8 8 8) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/ETT/ \ --data_path ETTh2.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTm1.sh ================================================ export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=ETTm1 seq_lens=(512 512 512 512) pred_lens=(96 192 336 720) learning_rates=(0.001277976 0.002415901 0.001594735 0.002011441) batches=(256 256 256 256) epochs=(80 80 80 80) dropouts=(0.4 0.4 0.4 0.4) patch_lens=(48 48 48 48) lradjs=(type3 type3 type3 type3) d_models=(256 128 256 128) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(db2 db3 db5 db5) levels=(1 1 1 4) tfactors=(5 3 7 3) dfactors=(3 7 7 8) strides=(24 24 24 24) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/ETT/ \ --data_path ETTm1.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTm2.sh ================================================ # Set the GPU to use export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=ETTm2 seq_lens=(512 512 512 512) pred_lens=(96 192 336 720) learning_rates=(0.00076587 0.000275775 0.000234608 0.001039536) batches=(256 256 256 256) epochs=(80 80 80 80) dropouts=(0.4 0.2 0.4 0.4) patch_lens=(48 48 48 48) lradjs=(type3 type3 type3 type3) d_models=(256 256 256 256) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(bior3.1 db2 db2 db2) levels=(1 1 1 1) tfactors=(3 3 3 3) dfactors=(8 7 5 8) strides=(24 24 24 24) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/ETT/ \ --data_path ETTm2.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/ETT_script/iTransformer_ETTh2.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_96 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 128 \ --d_ff 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_192 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 128 \ --d_ff 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_336 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 128 \ --d_ff 128 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/ETT-small/ \ --data_path ETTh2.csv \ --model_id ETTh2_96_720 \ --model $model_name \ --data ETTh2 \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --d_model 128 \ --d_ff 128 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 384 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 384 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/Koopa.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_192_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_288_144 \ --model $model_name \ --data custom \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_384_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/Mamba.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_$pred_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --enc_in 8 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 8 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/Exchange_script/Nonstationary_Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 64 64 64 64 \ --p_hidden_layers 4 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 ================================================ FILE: scripts/long_term_forecast/Exchange_script/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/Exchange_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/exchange_rate/ \ --data_path exchange_rate.csv \ --model_id Exchange_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 8 \ --dec_in 8 \ --c_out 8 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --dropout 0.6 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ -dropout 0.6 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ -dropout 0.6 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ -dropout 0.6 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=5 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model FiLM \ --data custom \ --features M \ --seq_len 60 \ --label_len 18 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model FiLM \ --data custom \ --features M \ --seq_len 60 \ --label_len 18 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model FiLM \ --data custom \ --features M \ --seq_len 60 \ --label_len 18 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model FiLM \ --data custom \ --features M \ --seq_len 60 \ --label_len 18 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/Koopa.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_48_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 48 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_72_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 72 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_96_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_120_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 120 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 36 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 36 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 36 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 36 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/Nonstationary_Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 32 32 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 32 32 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 16 16 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 8 8 \ --p_hidden_layers 2 ================================================ FILE: scripts/long_term_forecast/ILI_script/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 24 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --d_model 1024\ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 36 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --d_model 2048\ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 48 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 4 \ --d_model 2048\ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 60 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --n_heads 16 \ --d_model 2048\ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --d_model 768 \ --d_ff 768 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/ILI_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_24 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 24 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_36 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 36 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/illness/ \ --data_path national_illness.csv \ --model_id ili_36_60 \ --model $model_name \ --data custom \ --features M \ --seq_len 36 \ --label_len 18 \ --pred_len 60 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 7 \ --dec_in 7 \ --c_out 7 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Mamba_all.sh ================================================ ./scripts/long_term_forecast/ECL_script/Mamba.sh ./scripts/long_term_forecast/Traffic_script/Mamba.sh ./scripts/long_term_forecast/Exchange_script/Mamba.sh ./scripts/long_term_forecast/Weather_script/Mamba.sh ================================================ FILE: scripts/long_term_forecast/Traffic_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 ================================================ FILE: scripts/long_term_forecast/Traffic_script/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --top_k 5 \ --des 'Exp' \ --n_heads 2 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --top_k 5 \ --des 'Exp' \ --n_heads 2 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --top_k 5 \ --des 'Exp' \ --n_heads 2 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --top_k 5 \ --des 'Exp' \ --n_heads 2 \ --batch_size 4 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=FiLM python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --batch_size 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --batch_size 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --batch_size 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --batch_size 2 ================================================ FILE: scripts/long_term_forecast/Traffic_script/Koopa.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=Koopa python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_48 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --pred_len 48 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_192_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 192 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_288_144 \ --model $model_name \ --data custom \ --features M \ --seq_len 288 \ --pred_len 144 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_384_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 384 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/Mamba.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_$pred_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --enc_in 862 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 862 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/Traffic_script/MultiPatchFormer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MultiPatchFormer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/Nonstationary_Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 \ --p_hidden_dims 128 128 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 \ --p_hidden_dims 128 128 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 \ --p_hidden_dims 16 16 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 \ --p_hidden_dims 128 128 \ --p_hidden_layers 2 ================================================ FILE: scripts/long_term_forecast/Traffic_script/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --batch_size 4 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 ================================================ FILE: scripts/long_term_forecast/Traffic_script/SegRNN.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 24 \ --enc_in 862 \ --d_model 512 \ --dropout 0 \ --learning_rate 0.001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/Traffic_script/TSMixer.sh ================================================ model_name=TSMixer learning_rate=0.001 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ ================================================ FILE: scripts/long_term_forecast/Traffic_script/TimeMixer.sh ================================================ #export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=3 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=32 d_ff=64 batch_size=8 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id Traffic_$seq_len'_'96 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id Traffic_$seq_len'_'192 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id Traffic_$seq_len'_'336 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id Traffic_$seq_len'_'720 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size $batch_size \ --learning_rate $learning_rate \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/Traffic_script/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 3 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --des 'Exp' \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --des 'Exp' \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --des 'Exp' \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --des 'Exp' \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --d_model 512 \ --d_ff 512 \ --top_k 5 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Traffic_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 ================================================ FILE: scripts/long_term_forecast/Traffic_script/WPMixer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=traffic seq_lens=(1200 1200 1200 1200) pred_lens=(96 192 336 720) learning_rates=(0.0010385 0.000567053 0.001026715 0.001496217) batches=(16 16 16 16) epochs=(60 60 50 60) dropouts=(0.05 0.05 0.0 0.05) patch_lens=(16 16 16 16) lradjs=(type3 type3 type3 type3) d_models=(16 32 32 32) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(db3 db3 bior3.1 db3) levels=(1 1 1 1) tfactors=(3 3 7 7) dfactors=(5 5 7 3) strides=(8 8 8 8) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/traffic/ \ --data_path traffic.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/Traffic_script/iTransformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/traffic/ \ --data_path traffic.csv \ --model_id traffic_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 4 \ --d_layers 1 \ --factor 3 \ --enc_in 862 \ --dec_in 862 \ --c_out 862 \ --des 'Exp' \ --d_model 512 \ --d_ff 512 \ --batch_size 16 \ --learning_rate 0.001 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/Autoformer.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Autoformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --train_epochs 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/Crossformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Crossformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/FiLM.sh ================================================ export CUDA_VISIBLE_DEVICES=6 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model FiLM \ --data custom \ --features M \ --seq_len 720 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model FiLM \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model FiLM \ --data custom \ --features M \ --seq_len 192 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model FiLM \ --data custom \ --features M \ --seq_len 336 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/MICN.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=MICN python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 96 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/Mamba.sh ================================================ model_name=Mamba for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_$pred_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $pred_len \ --label_len 48 \ --pred_len $pred_len \ --e_layers 2 \ --d_layers 1 \ --enc_in 21 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 21 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ done ================================================ FILE: scripts/long_term_forecast/Weather_script/MultiPatchFormer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=MultiPatchFormer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 1 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 256 \ --d_ff 512 \ --des 'Exp' \ --n_heads 8 \ --batch_size 32 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/Nonstationary_Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=6 model_name=Nonstationary_Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 128 128 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 128 128 \ --p_hidden_layers 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --p_hidden_dims 128 128 \ --p_hidden_layers 2 ================================================ FILE: scripts/long_term_forecast/Weather_script/PatchTST.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=PatchTST python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --n_heads 4 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --n_heads 16 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --n_heads 4 \ --batch_size 128 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --n_heads 4 \ --batch_size 128 \ --train_epochs 3 ================================================ FILE: scripts/long_term_forecast/Weather_script/Pyraformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Pyraformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --train_epochs 2 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/SegRNN.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=SegRNN seq_len=96 for pred_len in 96 192 336 720 do python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_$seq_len'_'$pred_len \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --pred_len $pred_len \ --seg_len 48 \ --enc_in 21 \ --d_model 512 \ --dropout 0.5 \ --learning_rate 0.0001 \ --des 'Exp' \ --itr 1 done ================================================ FILE: scripts/long_term_forecast/Weather_script/TSMixer.sh ================================================ model_name=TSMixer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ ================================================ FILE: scripts/long_term_forecast/Weather_script/TimeMixer.sh ================================================ #export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer seq_len=96 e_layers=3 down_sampling_layers=3 down_sampling_window=2 learning_rate=0.01 d_model=16 d_ff=32 batch_size=16 train_epochs=20 patience=10 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 96 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 192 \ --e_layers $e_layers \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 336 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len $seq_len \ --label_len 0 \ --pred_len 720 \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --d_model $d_model \ --d_ff $d_ff \ --batch_size 128 \ --learning_rate $learning_rate \ --train_epochs $train_epochs \ --patience $patience \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window ================================================ FILE: scripts/long_term_forecast/Weather_script/TimeXer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeXer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 256 \ --d_ff 512 \ --batch_size 4 \ --itr 1 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 128 \ --d_ff 1024 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 256 \ --batch_size 4 \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 128 \ --batch_size 4 \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/TimesNet.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=TimesNet python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --train_epochs 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/Transformer.sh ================================================ export CUDA_VISIBLE_DEVICES=7 model_name=Transformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 \ --train_epochs 3 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --itr 1 ================================================ FILE: scripts/long_term_forecast/Weather_script/WPMixer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 # Model name model_name=WPMixer # Datasets and prediction lengths dataset=weather seq_lens=(512 512 512 512) pred_lens=(96 192 336 720) learning_rates=(0.000913333 0.001379042 0.000607991 0.001470479) batches=(32 64 32 128) epochs=(60 60 60 60) dropouts=(0.4 0.4 0.4 0.4) patch_lens=(16 16 16 16) lradjs=(type3 type3 type3 type3) d_models=(256 128 128 128) patiences=(12 12 12 12) # Model params below need to be set in WPMixer.py Line 15, instead of this script wavelets=(db3 db3 db3 db2) levels=(2 1 2 1) tfactors=(3 3 7 7) dfactors=(7 7 7 5) strides=(8 8 8 8) # Loop over datasets and prediction lengths for i in "${!pred_lens[@]}"; do python -u run.py \ --is_training 1 \ --root_path ./data/weather/ \ --data_path weather.csv \ --model_id wpmixer \ --model $model_name \ --task_name long_term_forecast \ --data $dataset \ --seq_len ${seq_lens[$i]} \ --pred_len ${pred_lens[$i]} \ --label_len 0 \ --d_model ${d_models[$i]} \ --patch_len ${patch_lens[$i]} \ --batch_size ${batches[$i]} \ --learning_rate ${learning_rates[$i]} \ --lradj ${lradjs[$i]} \ --dropout ${dropouts[$i]} \ --patience ${patiences[$i]} \ --train_epochs ${epochs[$i]} \ --use_amp done ================================================ FILE: scripts/long_term_forecast/Weather_script/iTransformer.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_96 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 96 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 512\ --d_ff 512\ --itr 1 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_192 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 192 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 512\ --d_ff 512\ --itr 1 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_336 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 336 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 512\ --d_ff 512\ --itr 1 \ python -u run.py \ --task_name long_term_forecast \ --is_training 1 \ --root_path ./dataset/weather/ \ --data_path weather.csv \ --model_id weather_96_720 \ --model $model_name \ --data custom \ --features M \ --seq_len 96 \ --label_len 48 \ --pred_len 720 \ --e_layers 3 \ --d_layers 1 \ --factor 3 \ --enc_in 21 \ --dec_in 21 \ --c_out 21 \ --des 'Exp' \ --d_model 512\ --d_ff 512\ --itr 1 ================================================ FILE: scripts/short_term_forecast/Autoformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Autoformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/Crossformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=5 model_name=Crossformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 16 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/DLinear_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=DLinear python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/ETSformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=ETSformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ../dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 2 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/FEDformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=FEDformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/FiLM_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=3 model_name=FiLM python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 16 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/Informer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Informer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/LightTS_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=LightTS python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/MICN_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=4 model_name=MICN python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 16 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/Mamba_M4.sh ================================================ # export CUDA_VISIBLE_DEVICES=1 model_name=Mamba python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --enc_in 1 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 1 \ --batch_size 16 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --enc_in 1 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 1 \ --batch_size 16 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --enc_in 1 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 1 \ --batch_size 16 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --enc_in 1 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 1 \ --batch_size 16 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --enc_in 1 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 1 \ --batch_size 16 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --enc_in 1 \ --expand 2 \ --d_ff 16 \ --d_conv 4 \ --c_out 1 \ --batch_size 16 \ --d_model 128 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/Nonstationary_Transformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Nonstationary_Transformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' \ --p_hidden_dims 256 256 \ --p_hidden_layers 2 ================================================ FILE: scripts/short_term_forecast/Pyraformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Pyraformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/Reformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Reformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/TSMixer_M4.sh ================================================ #export CUDA_VISIBLE_DEVICES=1 model_name=MTSMixer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/TimeMixer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimeMixer e_layers=4 down_sampling_layers=1 down_sampling_window=2 learning_rate=0.01 d_model=32 d_ff=32 batch_size=16 python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 128 \ --d_model $d_model \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ --train_epochs 50 \ --patience 20 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 128 \ --d_model $d_model \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ --train_epochs 50 \ --patience 20 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 128 \ --d_model $d_model \ --d_ff 64 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ --train_epochs 50 \ --patience 20 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 128 \ --d_model $d_model \ --d_ff 16 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ --train_epochs 50 \ --patience 20 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 128 \ --d_model $d_model \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ --train_epochs 50 \ --patience 20 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers $e_layers \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 128 \ --d_model $d_model \ --d_ff 32 \ --des 'Exp' \ --itr 1 \ --learning_rate $learning_rate \ --train_epochs 50 \ --patience 20 \ --down_sampling_layers $down_sampling_layers \ --down_sampling_method avg \ --down_sampling_window $down_sampling_window \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/TimesNet_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=TimesNet python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 64 \ --d_ff 64 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 16 \ --d_ff 16 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 32 \ --d_ff 32 \ --top_k 5 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/Transformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=1 model_name=Transformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: scripts/short_term_forecast/iTransformer_M4.sh ================================================ export CUDA_VISIBLE_DEVICES=0 model_name=iTransformer python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Monthly' \ --model_id m4_Monthly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Yearly' \ --model_id m4_Yearly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Quarterly' \ --model_id m4_Quarterly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Weekly' \ --model_id m4_Weekly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Daily' \ --model_id m4_Daily \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' python -u run.py \ --task_name short_term_forecast \ --is_training 1 \ --root_path ./dataset/m4 \ --seasonal_patterns 'Hourly' \ --model_id m4_Hourly \ --model $model_name \ --data m4 \ --features M \ --e_layers 2 \ --d_layers 1 \ --factor 3 \ --enc_in 1 \ --dec_in 1 \ --c_out 1 \ --batch_size 16 \ --d_model 512 \ --des 'Exp' \ --itr 1 \ --learning_rate 0.001 \ --loss 'SMAPE' ================================================ FILE: tutorial/TimesNet_tutorial.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# TimesNet Tutorial\n", "**Set-up instructions:** this notebook give a tutorial on the learning task supported by `TimesNet`.\n", "\n", "`TimesNet` can support basically 5 tasks, which are respectively long-term forecast, short-term forecast, imputation, anomaly detection, classification." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Install Python 3.8. For convenience, execute the following command." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "pip install -r requirements.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Package Import" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch \n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.fft\n", "from layers.Embed import DataEmbedding\n", "from layers.Conv_Blocks import Inception_Block_V1 \n", " #convolution block used for convoluting the 2D time data, changeable" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. TimesBlock Construction\n", " The core idea of `TimesNet` lies in the construction of `TimesBlock`, which generally gets the base frequencies by implementing FFT on the data, and then reshapes the times series to 2D variation respectively from the main base frequencies, followed by a 2D convolution whose outputs are reshaped back and added with weight to form the final output.\n", "\n", " In the following section, we will have a detailed view on `TimesBlock`.\n", "\n", " TimesBlock has 2 members. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class TimesBlock(nn.Module):\n", " def __init__(self, configs):\n", " ...\n", " \n", " def forward(self, x):\n", " ..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, let's focus on ```__init__(self, configs):```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def __init__(self, configs): ##configs is the configuration defined for TimesBlock\n", " super(TimesBlock, self).__init__() \n", " self.seq_len = configs.seq_len ##sequence length \n", " self.pred_len = configs.pred_len ##prediction length\n", " self.k = configs.top_k ##k denotes how many top frequencies are \n", " #taken into consideration\n", " # parameter-efficient design\n", " self.conv = nn.Sequential(\n", " Inception_Block_V1(configs.d_model, configs.d_ff,\n", " num_kernels=configs.num_kernels),\n", " nn.GELU(),\n", " Inception_Block_V1(configs.d_ff, configs.d_model,\n", " num_kernels=configs.num_kernels)\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then, have a look at ```forward(self, x)```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def forward(self, x):\n", " B, T, N = x.size()\n", " #B: batch size T: length of time series N:number of features\n", " period_list, period_weight = FFT_for_Period(x, self.k)\n", " #FFT_for_Period() will be shown later. Here, period_list([top_k]) denotes \n", " #the top_k-significant period and period_weight([B, top_k]) denotes its weight(amplitude)\n", "\n", " res = []\n", " for i in range(self.k):\n", " period = period_list[i]\n", "\n", " # padding : to form a 2D map, we need total length of the sequence, plus the part \n", " # to be predicted, to be divisible by the period, so padding is needed\n", " if (self.seq_len + self.pred_len) % period != 0:\n", " length = (\n", " ((self.seq_len + self.pred_len) // period) + 1) * period\n", " padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)\n", " out = torch.cat([x, padding], dim=1)\n", " else:\n", " length = (self.seq_len + self.pred_len)\n", " out = x\n", "\n", " # reshape: we need each channel of a single piece of data to be a 2D variable,\n", " # Also, in order to implement the 2D conv later on, we need to adjust the 2 dimensions \n", " # to be convolutioned to the last 2 dimensions, by calling the permute() func.\n", " # Whereafter, to make the tensor contiguous in memory, call contiguous()\n", " out = out.reshape(B, length // period, period,\n", " N).permute(0, 3, 1, 2).contiguous()\n", " \n", " #2D convolution to grap the intra- and inter- period information\n", " out = self.conv(out)\n", "\n", " # reshape back, similar to reshape\n", " out = out.permute(0, 2, 3, 1).reshape(B, -1, N)\n", " \n", " #truncating down the padded part of the output and put it to result\n", " res.append(out[:, :(self.seq_len + self.pred_len), :])\n", " res = torch.stack(res, dim=-1) #res: 4D [B, length , N, top_k]\n", "\n", " # adaptive aggregation\n", " #First, use softmax to get the normalized weight from amplitudes --> 2D [B,top_k]\n", " period_weight = F.softmax(period_weight, dim=1) \n", "\n", " #after two unsqueeze(1),shape -> [B,1,1,top_k],so repeat the weight to fit the shape of res\n", " period_weight = period_weight.unsqueeze(\n", " 1).unsqueeze(1).repeat(1, T, N, 1)\n", " \n", " #add by weight the top_k periods' result, getting the result of this TimesBlock\n", " res = torch.sum(res * period_weight, -1)\n", "\n", " # residual connection\n", " res = res + x\n", " return res" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The ```FFT_for_Period``` above is given by:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def FFT_for_Period(x, k=2):\n", " # xf shape [B, T, C], denoting the amplitude of frequency(T) given the datapiece at B,N\n", " xf = torch.fft.rfft(x, dim=1) \n", "\n", " # find period by amplitudes: here we assume that the periodic features are basically constant\n", " # in different batch and channel, so we mean out these two dimensions, getting a list frequency_list with shape[T] \n", " # each element at pos t of frequency_list denotes the overall amplitude at frequency (t)\n", " frequency_list = abs(xf).mean(0).mean(-1) \n", " frequency_list[0] = 0\n", "\n", " #by torch.topk(),we can get the biggest k elements of frequency_list, and its positions(i.e. the k-main frequencies in top_list)\n", " _, top_list = torch.topk(frequency_list, k)\n", "\n", " #Returns a new Tensor 'top_list', detached from the current graph.\n", " #The result will never require gradient.Convert to a numpy instance\n", " top_list = top_list.detach().cpu().numpy()\n", " \n", " #period:a list of shape [top_k], recording the periods of mean frequencies respectively\n", " period = x.shape[1] // top_list\n", "\n", " #Here,the 2nd item returned has a shape of [B, top_k],representing the biggest top_k amplitudes \n", " # for each piece of data, with N features being averaged.\n", " return period, abs(xf).mean(-1)[:, top_list] " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To make it clearer, please see the figures below.\n", "\n", "![FFT demonstrator](./fft.png)\n", "\n", "![2D Conv demonstrator](./conv.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For more details, please read the our paper \n", "(link: https://openreview.net/pdf?id=ju_Uqw384Oq)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4. TimesNet\n", "\n", "So far we've got `TimesBlock`, which is excel at retrieving intra- and inter- period temporal information. We become capable of building a `TimesNet`. `TimesNet` is proficient in multitasks including short- and long-term forecasting, imputation, classification, and anomaly detection.\n", "\n", "In this section, we'll have a detailed overview on how `TimesNet` gains its power in these tasks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Model(nn.Module):\n", " def __init__(self, configs):\n", " ...\n", " \n", " def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):\n", " ...\n", "\n", " def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n", " ...\n", "\n", " def anomaly_detection(self, x_enc):\n", " ...\n", " \n", " def classification(self, x_enc, x_mark_enc):\n", " ...\n", "\n", " def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n", " ..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First of all, let's focus on ```__init__(self, configs):```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def __init__(self, configs):\n", " super(Model, self).__init__()\n", " #params init\n", " self.configs = configs\n", " self.task_name = configs.task_name\n", " self.seq_len = configs.seq_len\n", " self.label_len = configs.label_len\n", " self.pred_len = configs.pred_len\n", "\n", " #stack TimesBlock for e_layers times to form the main part of TimesNet, named model\n", " self.model = nn.ModuleList([TimesBlock(configs)\n", " for _ in range(configs.e_layers)])\n", " \n", " #embedding & normalization\n", " # enc_in is the encoder input size, the number of features for a piece of data\n", " # d_model is the dimension of embedding\n", " self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,\n", " configs.dropout)\n", " self.layer = configs.e_layers # num of encoder layers\n", " self.layer_norm = nn.LayerNorm(configs.d_model)\n", "\n", " #define the some layers for different tasks\n", " if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':\n", " self.predict_linear = nn.Linear(\n", " self.seq_len, self.pred_len + self.seq_len)\n", " self.projection = nn.Linear(\n", " configs.d_model, configs.c_out, bias=True)\n", " if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':\n", " self.projection = nn.Linear(\n", " configs.d_model, configs.c_out, bias=True)\n", " if self.task_name == 'classification':\n", " self.act = F.gelu\n", " self.dropout = nn.Dropout(configs.dropout)\n", " self.projection = nn.Linear(\n", " configs.d_model * configs.seq_len, configs.num_class)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4.1 Forecast\n", "\n", "The basic idea of forecasting is to lengthen the known sequence to (seq_len+pred_len), which is the total length after forecasting. Then by several TimesBlock layers together with layer normalization, some underlying intra- and inter- period information is represented. With these information, we can project it to the output space. Whereafter by denorm ( if Non-stationary Transformer) we get the final output." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):\n", " # Normalization from Non-stationary Transformer at temporal dimension\n", " means = x_enc.mean(1, keepdim=True).detach() #[B,T]\n", " x_enc = x_enc - means\n", " stdev = torch.sqrt(\n", " torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n", " x_enc /= stdev\n", "\n", " # embedding: projecting a number to a C-channel vector\n", " enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] C is d_model\n", " enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(\n", " 0, 2, 1) # align temporal dimension [B,pred_len+seq_len,C]\n", " \n", " # TimesNet: pass through TimesBlock for self.layer times each with layer normalization\n", " for i in range(self.layer):\n", " enc_out = self.layer_norm(self.model[i](enc_out))\n", "\n", " # project back #[B,T,d_model]-->[B,T,c_out]\n", " dec_out = self.projection(enc_out) \n", "\n", " # De-Normalization from Non-stationary Transformer\n", " dec_out = dec_out * \\\n", " (stdev[:, 0, :].unsqueeze(1).repeat(\n", " 1, self.pred_len + self.seq_len, 1)) #lengthen the stdev to fit the dec_out\n", " dec_out = dec_out + \\\n", " (means[:, 0, :].unsqueeze(1).repeat(\n", " 1, self.pred_len + self.seq_len, 1)) #lengthen the mean to fit the dec_out\n", " return dec_out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4.2 Imputation\n", "\n", "Imputation is a task aiming at completing some missing value in the time series, so in some degree it's similar to forecast. We can still use the similar step to cope with it." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n", " # Normalization from Non-stationary Transformer\n", " means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)\n", " means = means.unsqueeze(1).detach()\n", " x_enc = x_enc - means\n", " x_enc = x_enc.masked_fill(mask == 0, 0)\n", " stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /\n", " torch.sum(mask == 1, dim=1) + 1e-5)\n", " stdev = stdev.unsqueeze(1).detach()\n", " x_enc /= stdev\n", "\n", " # embedding\n", " enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]\n", " # TimesNet\n", " for i in range(self.layer):\n", " enc_out = self.layer_norm(self.model[i](enc_out))\n", " # project back\n", " dec_out = self.projection(enc_out)\n", "\n", " # De-Normalization from Non-stationary Transformer\n", " dec_out = dec_out * \\\n", " (stdev[:, 0, :].unsqueeze(1).repeat(\n", " 1, self.pred_len + self.seq_len, 1))\n", " dec_out = dec_out + \\\n", " (means[:, 0, :].unsqueeze(1).repeat(\n", " 1, self.pred_len + self.seq_len, 1))\n", " return dec_out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4.3 Anomaly Detection\n", "\n", "Similar to Imputation." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def anomaly_detection(self, x_enc):\n", " # Normalization from Non-stationary Transformer\n", " means = x_enc.mean(1, keepdim=True).detach()\n", " x_enc = x_enc - means\n", " stdev = torch.sqrt(\n", " torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n", " x_enc /= stdev\n", " # embedding\n", " enc_out = self.enc_embedding(x_enc, None) # [B,T,C]\n", " # TimesNet\n", " for i in range(self.layer):\n", " enc_out = self.layer_norm(self.model[i](enc_out))\n", " # project back\n", " dec_out = self.projection(enc_out)\n", " # De-Normalization from Non-stationary Transformer\n", " dec_out = dec_out * \\\n", " (stdev[:, 0, :].unsqueeze(1).repeat(\n", " 1, self.pred_len + self.seq_len, 1))\n", " dec_out = dec_out + \\\n", " (means[:, 0, :].unsqueeze(1).repeat(\n", " 1, self.pred_len + self.seq_len, 1))\n", " return dec_out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4.4 Classification" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def classification(self, x_enc, x_mark_enc):\n", " # embedding\n", " enc_out = self.enc_embedding(x_enc, None) # [B,T,C]\n", " # TimesNet\n", " for i in range(self.layer):\n", " enc_out = self.layer_norm(self.model[i](enc_out))\n", "\n", " # Output\n", " # the output transformer encoder/decoder embeddings don't include non-linearity\n", " output = self.act(enc_out)\n", " output = self.dropout(output)\n", "\n", " # zero-out padding embeddings:The primary role of x_mark_enc in the code is to \n", " # zero out the embeddings for padding positions in the output tensor through \n", " # element-wise multiplication, helping the model to focus on meaningful data \n", " # while disregarding padding.\n", " output = output * x_mark_enc.unsqueeze(-1)\n", " \n", " # (batch_size, seq_length * d_model)\n", " output = output.reshape(output.shape[0], -1)\n", " output = self.projection(output) # (batch_size, num_classes)\n", " return output" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the end, with so many tasks above, we become able to complete `forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):`. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n", " if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':\n", " dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)\n", " return dec_out[:, -self.pred_len:, :] # [B, L, D] return the predicted part of sequence\n", " if self.task_name == 'imputation':\n", " dec_out = self.imputation(\n", " x_enc, x_mark_enc, x_dec, x_mark_dec, mask)\n", " return dec_out # [B, L, D] return the whole sequence with missing value estimated\n", " if self.task_name == 'anomaly_detection':\n", " dec_out = self.anomaly_detection(x_enc)\n", " return dec_out # [B, L, D] return the sequence that should be correct\n", " if self.task_name == 'classification':\n", " dec_out = self.classification(x_enc, x_mark_enc)\n", " return dec_out # [B, N] return the classification result\n", " return None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5. Training and Settings\n", "\n", "By now we've successfully build up `TimesNet`. We are now facing the problem how to train and test this neural network. The action of training, validating as well as testing is implemented at __*exp*__ part, in which codes for different tasks are gathered. These experiments are not only for `TimesNet` training, but also feasible for any other time series representation model. But here, we simply use `TimesNet` to analyse.\n", "\n", "`TimesNet` is a state-of-art in multiple tasks, while here we would only introduce its training for long-term forecast task, since the backbone of the training process for other tasks is similar to this one. Again, test and validation code can be easily understood once you've aware how the training process works. So first of all, we are going to focus on the training of `TimesNet` on task long-term forecasting.\n", "\n", "We will discuss many aspects, including the training process, training loss etc." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.1 Training for Long-term Forecast Task\n", "\n", "The following codes represents the process of training model for long-term forecasting task. We'll have a detailed look at it. To make it brief, the training part can be briefly divided into several parts, including Data Preparation, Creating Save Path, Initialization, Optimizer and Loss Function Selection, Using Mixed Precision Training, Training Loop, Validation and Early Stopping, Learning Rate Adjustment, Loading the Best Model.\n", "\n", "For more details, please see the code below. 'train' process is defined in the experiment __class Exp_Long_Term_Forecast__." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def train(self, setting): #setting is the args for this model training\n", " #get train dataloader\n", " train_data, train_loader = self._get_data(flag='train')\n", " vali_data, vali_loader = self._get_data(flag='val')\n", " test_data, test_loader = self._get_data(flag='test')\n", "\n", " # set path of checkpoint for saving and loading model\n", " path = os.path.join(self.args.checkpoints, setting)\n", " if not os.path.exists(path):\n", " os.makedirs(path)\n", " time_now = time.time()\n", "\n", " train_steps = len(train_loader)\n", "\n", " # EarlyStopping is typically a custom class or function that monitors the performance \n", " # of a model during training, usually by tracking a certain metric (commonly validation \n", " # loss or accuracy).It's a common technique used in deep learning to prevent overfitting \n", " # during the training\n", " early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)\n", "\n", " #Optimizer and Loss Function Selection\n", " model_optim = self._select_optimizer()\n", " criterion = self._select_criterion()\n", "\n", " # AMP training is a technique that uses lower-precision data types (e.g., float16) \n", " # for certain computations to accelerate training and reduce memory usage.\n", " if self.args.use_amp: \n", " scaler = torch.cuda.amp.GradScaler()\n", " for epoch in range(self.args.train_epochs):\n", " iter_count = 0\n", " train_loss = []\n", " self.model.train()\n", " epoch_time = time.time()\n", "\n", " #begin training in this epoch\n", " for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):\n", " iter_count += 1\n", " model_optim.zero_grad()\n", " batch_x = batch_x.float().to(self.device) #input features\n", " batch_y = batch_y.float().to(self.device) #target features\n", "\n", " # _mark holds information about time-related features. Specifically, it is a \n", " # tensor that encodes temporal information and is associated with the \n", " # input data batch_x.\n", " batch_x_mark = batch_x_mark.float().to(self.device)\n", " batch_y_mark = batch_y_mark.float().to(self.device)\n", " # decoder input(didn't use in TimesNet case)\n", " dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n", " dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n", " # encoder - decoder\n", " if self.args.use_amp: #in the case of TimesNet, use_amp should be False\n", " with torch.cuda.amp.autocast():\n", " # whether to output attention in ecoder,in TimesNet case is no\n", " if self.args.output_attention: \n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n", " # model the input\n", " else:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", "\n", " # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, \n", " # S:univariate predict univariate, MS:multivariate predict univariate'\n", " #if multivariate predict univariate',then output should be the last column of the decoder\n", " # output, so f_dim = -1 to only contain the last column, else is all columns\n", " f_dim = -1 if self.args.features == 'MS' else 0 \n", " outputs = outputs[:, -self.args.pred_len:, f_dim:]\n", " batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n", "\n", " # calc loss\n", " loss = criterion(outputs, batch_y)\n", " train_loss.append(loss.item())\n", " else: #similar to when use_amp is True\n", " if self.args.output_attention:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n", " else:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", " f_dim = -1 if self.args.features == 'MS' else 0\n", " outputs = outputs[:, -self.args.pred_len:, f_dim:]\n", " batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n", " loss = criterion(outputs, batch_y)\n", " train_loss.append(loss.item())\n", "\n", " # When train rounds attain some 100-multiple, print speed, left time, loss. etc feedback\n", " if (i + 1) % 100 == 0:\n", " print(\"\\titers: {0}, epoch: {1} | loss: {2:.7f}\".format(i + 1, epoch + 1, loss.item()))\n", " speed = (time.time() - time_now) / iter_count\n", " left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)\n", " print('\\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))\n", " iter_count = 0\n", " time_now = time.time()\n", "\n", " #BP\n", " if self.args.use_amp:\n", " scaler.scale(loss).backward()\n", " scaler.step(model_optim)\n", " scaler.update()\n", " else:\n", " loss.backward()\n", " model_optim.step()\n", " \n", " #This epoch comes to end, print information\n", " print(\"Epoch: {} cost time: {}\".format(epoch + 1, time.time() - epoch_time))\n", " train_loss = np.average(train_loss)\n", "\n", " #run test and validation on current model\n", " vali_loss = self.vali(vali_data, vali_loader, criterion)\n", " test_loss = self.vali(test_data, test_loader, criterion)\n", "\n", " #print train, test, vali loss information\n", " print(\"Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}\".format(\n", " epoch + 1, train_steps, train_loss, vali_loss, test_loss))\n", " \n", " #Decide whether to trigger Early Stopping. if early_stop is true, it means that \n", " #this epoch's training is now at a flat slope, so stop further training for this epoch.\n", " early_stopping(vali_loss, self.model, path)\n", " if early_stopping.early_stop:\n", " print(\"Early stopping\")\n", " break\n", "\n", " #adjust learning keys\n", " adjust_learning_rate(model_optim, epoch + 1, self.args)\n", " best_model_path = path + '/' + 'checkpoint.pth'\n", "\n", " # loading the trained model's state dictionary from a saved checkpoint file \n", " # located at best_model_path.\n", " self.model.load_state_dict(torch.load(best_model_path))\n", " return self.model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you want to learn more, please see it at exp/exp_long_term_forecasting.py" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.2 Early Stopping Mechanism\n", "\n", "__EarlyStopping__ is typically a custom class or function that monitors the performance of a model during training, usually by tracking a certain metric (commonly validation loss or accuracy).It's a common technique used in deep learning to prevent overfitting during the training.\n", "\n", "Let's see the code below(original code is in `tools.py`)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class EarlyStopping:\n", " def __init__(self, patience=7, verbose=False, delta=0):\n", " self.patience = patience # how many times will you tolerate for loss not being on decrease\n", " self.verbose = verbose # whether to print tip info\n", " self.counter = 0 # now how many times loss not on decrease\n", " self.best_score = None\n", " self.early_stop = False\n", " self.val_loss_min = np.inf\n", " self.delta = delta\n", "\n", " def __call__(self, val_loss, model, path):\n", " score = -val_loss\n", " if self.best_score is None:\n", " self.best_score = score\n", " self.save_checkpoint(val_loss, model, path)\n", "\n", " # meaning: current score is not 'delta' better than best_score, representing that \n", " # further training may not bring remarkable improvement in loss. \n", " elif score < self.best_score + self.delta: \n", " self.counter += 1\n", " print(f'EarlyStopping counter: {self.counter} out of {self.patience}')\n", " # 'No Improvement' times become higher than patience --> Stop Further Training\n", " if self.counter >= self.patience:\n", " self.early_stop = True\n", "\n", " else: #model's loss is still on decrease, save the now best model and go on training\n", " self.best_score = score\n", " self.save_checkpoint(val_loss, model, path)\n", " self.counter = 0\n", "\n", " def save_checkpoint(self, val_loss, model, path):\n", " ### used for saving the current best model\n", " if self.verbose:\n", " print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')\n", " torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')\n", " self.val_loss_min = val_loss" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.3 Optimizer and Criterion\n", "\n", "The optimizer and criterion are defined in __class Exp_Long_Term_Forecast__ and called in the training process by function `self._select_optimizer()` and `self._select_criterion()`. Here, for long-term forecasting task, we simply adopt Adam optimizer and MSELoss to meature the loss between real data and predicted ones." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def _select_optimizer(self):\n", " model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)\n", " return model_optim\n", "\n", "def _select_criterion(self):\n", " criterion = nn.MSELoss()\n", " return criterion" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.4 Automatic Mixed Precision(AMP)\n", "\n", "AMP is a technique used in deep learning to improve training speed and reduce memory usage. AMP achieves this by mixing calculations in half-precision (16-bit floating-point) and single-precision (32-bit floating-point).\n", "\n", "Let's have a closer look on this snippet:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#in forward process:\n", "with torch.cuda.amp.autocast():\n", "\n", "...\n", "\n", "#in BP process:\n", "if self.args.use_amp:\n", " scaler.scale(loss).backward()\n", " scaler.step(model_optim)\n", " scaler.update()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "` with torch.cuda.amp.autocast():` : The purpose of using torch.cuda.amp.autocast() is to take advantage of the speed and memory efficiency benefits of mixed-precision training while maintaining numerical stability. Some deep learning models can benefit significantly from this technique, especially on modern GPUs with hardware support for half-precision arithmetic. It allows you to perform certain calculations more quickly while still ensuring that critical calculations (e.g., gradient updates) are performed with sufficient precision to avoid loss of accuracy.\n", "\n", "`scaler.scale(loss).backward()`: If AMP is enabled, it uses a scaler object created with torch.cuda.amp.GradScaler() to automatically scale the loss and perform backward propagation. This is a crucial part of AMP, ensuring numerical stability. Before backpropagation, the loss is scaled to an appropriate range to prevent gradients from diverging too quickly or causing numerical instability.\n", "\n", "`scaler.step(model_optim)`: Next, the scaler calls the step method, which applies the scaled gradients to the model's optimizer (model_optim). This is used to update the model's weights to minimize the loss function.\n", "\n", "`scaler.update()`: Finally, the scaler calls the update method, which updates the scaling factor to ensure correct scaling of the loss for the next iteration. This step helps dynamically adjust the scaling of gradients to adapt to different training scenarios." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.5 Learning Rate Adjustment\n", "\n", "While the optimizer are responsible for adapting the learning rate with epochs, we would still like to do some adjustment on it manually, as indicated in the function `adjust_learning_rate(model_optim, epoch + 1, self.args)`, whose codes are shown below(original code is in `tools.py`): " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def adjust_learning_rate(optimizer, epoch, args):\n", "\n", " #first type: learning rate decrease with epoch by exponential\n", " if args.lradj == 'type1':\n", " lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}\n", "\n", " #second type: learning rate decrease manually\n", " elif args.lradj == 'type2':\n", " lr_adjust = {\n", " 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,\n", " 10: 5e-7, 15: 1e-7, 20: 5e-8\n", " }\n", "\n", " #1st type: update in each epoch\n", " #2nd type: only update in epochs that are written in Dict lr_adjust\n", " if epoch in lr_adjust.keys():\n", " lr = lr_adjust[epoch]\n", " \n", " # change the learning rate for different parameter groups within the optimizer\n", " for param_group in optimizer.param_groups:\n", " param_group['lr'] = lr\n", " print('Updating learning rate to {}'.format(lr))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6. Validation and Testing\n", "\n", "During training, the model continuously adjusts its weights and parameters to minimize training error. However, this may not reflect the model's performance on unseen data. Validation allows us to periodically assess the model's performance on data that is different from the training data, providing insights into the model's generalization ability.\n", "\n", "By comparing performance on the validation set, we can identify whether the model is overfitting. Overfitting occurs when a model performs well on training data but poorly on unseen data. Monitoring performance on the validation set helps detect overfitting early and take measures to prevent it, such as early stopping or adjusting hyperparameters.\n", "\n", "Here, we still take long-term forecasting as an example, similar to train process:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def vali(self, vali_data, vali_loader, criterion):\n", " total_loss = []\n", "\n", " #evaluation mode\n", " self.model.eval()\n", " with torch.no_grad():\n", " for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):\n", " batch_x = batch_x.float().to(self.device)\n", " batch_y = batch_y.float()\n", "\n", " batch_x_mark = batch_x_mark.float().to(self.device)\n", " batch_y_mark = batch_y_mark.float().to(self.device)\n", "\n", " # decoder input\n", " dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n", " dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n", " # encoder - decoder\n", " if self.args.use_amp:\n", " with torch.cuda.amp.autocast():\n", " if self.args.output_attention:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n", " else:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", " else:\n", " if self.args.output_attention:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n", " else:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", " f_dim = -1 if self.args.features == 'MS' else 0\n", " outputs = outputs[:, -self.args.pred_len:, f_dim:]\n", " batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n", "\n", " pred = outputs.detach().cpu()\n", " true = batch_y.detach().cpu()\n", "\n", " loss = criterion(pred, true)\n", "\n", " total_loss.append(loss)\n", " total_loss = np.average(total_loss)\n", " self.model.train()\n", " return total_loss" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Testing is similar to validation, but it's purpose is to examine how well the model behaves, so it's common to add some visualization with __matplotlib.pyplot__. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "def visual(true, preds=None, name='./pic/test.pdf'):\n", " \"\"\"\n", " Results visualization\n", " \"\"\"\n", " plt.figure()\n", " plt.plot(true, label='GroundTruth', linewidth=2)\n", " if preds is not None:\n", " plt.plot(preds, label='Prediction', linewidth=2)\n", " plt.legend()\n", " plt.savefig(name, bbox_inches='tight')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def test(self, setting, test=0):\n", " test_data, test_loader = self._get_data(flag='test')\n", " if test:\n", " print('loading model')\n", " self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))\n", "\n", " preds = []\n", " trues = []\n", " folder_path = './test_results/' + setting + '/'\n", " if not os.path.exists(folder_path):\n", " os.makedirs(folder_path)\n", "\n", " self.model.eval()\n", " with torch.no_grad():\n", " for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):\n", " batch_x = batch_x.float().to(self.device)\n", " batch_y = batch_y.float().to(self.device)\n", "\n", " batch_x_mark = batch_x_mark.float().to(self.device)\n", " batch_y_mark = batch_y_mark.float().to(self.device)\n", "\n", " # decoder input\n", " dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n", " dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n", " # encoder - decoder\n", " if self.args.use_amp:\n", " with torch.cuda.amp.autocast():\n", " if self.args.output_attention:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n", " else:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", " else:\n", " if self.args.output_attention:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n", "\n", " else:\n", " outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", "\n", " f_dim = -1 if self.args.features == 'MS' else 0\n", " outputs = outputs[:, -self.args.pred_len:, f_dim:]\n", " batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n", " outputs = outputs.detach().cpu().numpy()\n", " batch_y = batch_y.detach().cpu().numpy()\n", "\n", " #inverse the data if scaled\n", " if test_data.scale and self.args.inverse:\n", " outputs = test_data.inverse_transform(outputs)\n", " batch_y = test_data.inverse_transform(batch_y)\n", "\n", " pred = outputs\n", " true = batch_y\n", "\n", " preds.append(pred)\n", " trues.append(true)\n", "\n", " #visualize one piece of data every 20\n", " if i % 20 == 0:\n", " input = batch_x.detach().cpu().numpy()\n", " #the whole sequence\n", " gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)\n", " pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)\n", " visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))\n", "\n", " preds = np.array(preds)\n", " trues = np.array(trues) # shape[batch_num, batch_size, pred_len, features]\n", " print('test shape:', preds.shape, trues.shape)\n", " preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])\n", " trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])\n", " print('test shape:', preds.shape, trues.shape)\n", "\n", " # result save\n", " folder_path = './results/' + setting + '/'\n", " if not os.path.exists(folder_path):\n", " os.makedirs(folder_path)\n", "\n", " mae, mse, rmse, mape, mspe = metric(preds, trues)\n", " print('mse:{}, mae:{}'.format(mse, mae))\n", " f = open(\"result_long_term_forecast.txt\", 'a')\n", " f.write(setting + \" \\n\")\n", " f.write('mse:{}, mae:{}'.format(mse, mae))\n", " f.write('\\n')\n", " f.write('\\n')\n", " f.close()\n", " \n", " np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))\n", " np.save(folder_path + 'pred.npy', preds)\n", " np.save(folder_path + 'true.npy', trues)\n", "\n", " return\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7. Dataloader and DataProvider\n", "\n", "In the process of training, we simply take the dataloader for granted, by the function `self._get_data(flag='train')`. So how does this line work? Have a look at the definition(in __class Exp_Long_Term_Forecast__):" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def _get_data(self, flag):\n", " data_set, data_loader = data_provider(self.args, flag)\n", " return data_set, data_loader" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One step forward, see `data_provider(self.args, flag)`(in `data_factory.py`):" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Below are some dataloaders defined in data_loader.py. If you want to add your own data, \n", "# go and check data_loader.py to rewrite a dataloader to fit your data.\n", "data_dict = {\n", " 'ETTh1': Dataset_ETT_hour,\n", " 'ETTh2': Dataset_ETT_hour,\n", " 'ETTm1': Dataset_ETT_minute,\n", " 'ETTm2': Dataset_ETT_minute,\n", " 'custom': Dataset_Custom,\n", " 'm4': Dataset_M4,\n", " 'PSM': PSMSegLoader,\n", " 'MSL': MSLSegLoader,\n", " 'SMAP': SMAPSegLoader,\n", " 'SMD': SMDSegLoader,\n", " 'SWAT': SWATSegLoader,\n", " 'UEA': UEAloader\n", "}\n", "\n", "\n", "def data_provider(args, flag):\n", " Data = data_dict[args.data] #data_provider\n", "\n", " # time features encoding, options:[timeF, fixed, learned]\n", " timeenc = 0 if args.embed != 'timeF' else 1\n", "\n", " #test data provider\n", " if flag == 'test':\n", " shuffle_flag = False\n", " drop_last = True\n", " if args.task_name == 'anomaly_detection' or args.task_name == 'classification':\n", " batch_size = args.batch_size\n", "\n", " #Some tasks during the testing phase may require evaluating samples one at a time. \n", " # This could be due to variations in sample sizes in the test data or because the \n", " # evaluation process demands finer-grained results or different processing. \n", " else:\n", " batch_size = 1 # bsz=1 for evaluation\n", "\n", " #freq for time features encoding, \n", " # options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly,\n", " # m:monthly], you can also use more detailed freq like 15min or 3h')\n", " freq = args.freq\n", " else:\n", " shuffle_flag = True\n", " drop_last = True\n", " batch_size = args.batch_size # bsz for train and valid\n", " freq = args.freq\n", "\n", " if args.task_name == 'anomaly_detection':\n", " drop_last = False\n", " data_set = Data(\n", " root_path=args.root_path, #root path of the data file\n", " win_size=args.seq_len, #input sequence length\n", " flag=flag,\n", " )\n", " print(flag, len(data_set))\n", " data_loader = DataLoader(\n", " data_set,\n", " batch_size=batch_size,\n", " shuffle=shuffle_flag,\n", " num_workers=args.num_workers,#data loader num workers\n", " drop_last=drop_last)\n", " return data_set, data_loader\n", "\n", " elif args.task_name == 'classification':\n", " drop_last = False\n", " data_set = Data(\n", " root_path=args.root_path,\n", " flag=flag,\n", " )\n", "\n", " data_loader = DataLoader(\n", " data_set,\n", " batch_size=batch_size,\n", " shuffle=shuffle_flag,\n", " num_workers=args.num_workers,\n", " drop_last=drop_last,\n", " collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) \n", " #define some limits to collate pieces of data into batches\n", " )\n", " return data_set, data_loader\n", " else:\n", " if args.data == 'm4':\n", " drop_last = False\n", " data_set = Data(\n", " root_path=args.root_path, #eg. ./data/ETT/\n", " data_path=args.data_path, #eg. ETTh1.csv\n", " flag=flag,\n", " size=[args.seq_len, args.label_len, args.pred_len],\n", " features=args.features, #forecasting task, options:[M, S, MS]; \n", " # M:multivariate predict multivariate, S:univariate predict univariate,\n", " # MS:multivariate predict univariate\n", " \n", " target=args.target, #target feature in S or MS task\n", " timeenc=timeenc,\n", " freq=freq,\n", " seasonal_patterns=args.seasonal_patterns\n", " )\n", " print(flag, len(data_set))\n", " data_loader = DataLoader(\n", " data_set,\n", " batch_size=batch_size,\n", " shuffle=shuffle_flag,\n", " num_workers=args.num_workers,\n", " drop_last=drop_last)\n", " return data_set, data_loader\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "From above, it's easy to find that data_provider is responsible for collate the dataset into batches according to different tasks and running mode. It passes the parameters to dataloader(`Data`) to instruct it how to manage a data file into pieces of usable data. Then it also generates the final dara_loader by passing the built-up dataset and some other params to the standard class Dataloader. After that, a dataset that fits the need of the model and a enumerable dataloader are generated. \n", "\n", "So how to organize the data file into pieces of data that fits the model? Let's see `data_loader.py`! There are many dataloaders in it, and of course you can write your own dataloader, but here we'll only focus on __class Dataset_ETT_hour(Dataset)__ as an example." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Dataset_ETT_hour(Dataset):\n", " def __init__(self, root_path, flag='train', size=None,\n", " features='S', data_path='ETTh1.csv',\n", " target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):\n", " ... \n", " def __read_data__(self):\n", " ... \n", " def __getitem__(self, index):\n", " ...\n", " \n", " def __len__(self):\n", " ...\n", " \n", " def inverse_transform(self, data):\n", " ..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`__init__()` is the constructor used to initialize various parameters and attributes of the dataset. It takes a series of arguments, including the path to the data file, the dataset's flag (e.g., train, validate, test), dataset size, feature type, target variable, whether to scale the data, time encoding, time frequency, and more. These parameters are used to configure how the dataset is loaded and processed." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def __init__(self, root_path, flag='train', size=None,\n", " features='S', data_path='ETTh1.csv',\n", " target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):\n", " # size [seq_len, label_len, pred_len]\n", " # info\n", " if size == None:\n", " self.seq_len = 24 * 4 * 4\n", " self.label_len = 24 * 4\n", " self.pred_len = 24 * 4\n", " else:\n", " self.seq_len = size[0]\n", " self.label_len = size[1]\n", " self.pred_len = size[2]\n", " # init\n", " assert flag in ['train', 'test', 'val']\n", " type_map = {'train': 0, 'val': 1, 'test': 2}\n", " self.set_type = type_map[flag]\n", " self.features = features\n", " self.target = target\n", " self.scale = scale\n", " self.timeenc = timeenc\n", " self.freq = freq\n", " self.root_path = root_path\n", " self.data_path = data_path\n", " \n", " # After initialization, call __read_data__() to manage the data file.\n", " self.__read_data__()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The actual process of managing data file into usable data pieces happens in `__read_data__()`, see below:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def __read_data__(self):\n", " self.scaler = StandardScaler()\n", "\n", " #get raw data from path\n", " df_raw = pd.read_csv(os.path.join(self.root_path,\n", " self.data_path))\n", "\n", " # split data set into train, vali, test. border1 is the left border and border2 is the right.\n", " # Once flag(train, vali, test) is determined, __read_data__ will return certain part of the dataset.\n", " border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]\n", " border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]\n", " border1 = border1s[self.set_type]\n", " border2 = border2s[self.set_type]\n", "\n", " #decide which columns to select\n", " if self.features == 'M' or self.features == 'MS':\n", " cols_data = df_raw.columns[1:] # column name list (remove 'date')\n", " df_data = df_raw[cols_data] #remove the first column, which is time stamp info\n", " elif self.features == 'S':\n", " df_data = df_raw[[self.target]] # target column\n", "\n", " #scale data by the scaler that fits training data\n", " if self.scale:\n", " train_data = df_data[border1s[0]:border2s[0]]\n", " #train_data.values: turn pandas DataFrame into 2D numpy\n", " self.scaler.fit(train_data.values) \n", " data = self.scaler.transform(df_data.values)\n", " else:\n", " data = df_data.values \n", " \n", " #time stamp:df_stamp is a object of and\n", " # has one column called 'date' like 2016-07-01 00:00:00\n", " df_stamp = df_raw[['date']][border1:border2]\n", " \n", " # Since the date format is uncertain across different data file, we need to \n", " # standardize it so we call func 'pd.to_datetime'\n", " df_stamp['date'] = pd.to_datetime(df_stamp.date) \n", "\n", " if self.timeenc == 0: #time feature encoding is fixed or learned\n", " df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)\n", " df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)\n", " df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)\n", " df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)\n", " #now df_frame has multiple columns recording the month, day etc. time stamp\n", " # next we delete the 'date' column and turn 'DataFrame' to a list\n", " data_stamp = df_stamp.drop(['date'], 1).values\n", "\n", " elif self.timeenc == 1: #time feature encoding is timeF\n", " '''\n", " when entering this branch, we choose arg.embed as timeF meaning we want to \n", " encode the temporal info. 'freq' should be the smallest time step, and has \n", " options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')\n", " So you should check the timestep of your data and set 'freq' arg. \n", " After the time_features encoding, each date info format will be encoded into \n", " a list, with each element denoting the relative position of this time point\n", " (e.g. Day of Week, Day of Month, Hour of Day) and each normalized within scope[-0.5, 0.5]\n", " '''\n", " data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)\n", " data_stamp = data_stamp.transpose(1, 0)\n", " \n", " \n", " # data_x and data_y are same copy of a certain part of data\n", " self.data_x = data[border1:border2]\n", " self.data_y = data[border1:border2]\n", " self.data_stamp = data_stamp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`__read_data__()` splits the dataset into 3 parts, selects the needed columns and manages time stamp info. It gives out the well-managed data array for later use. Next, we have to finish the overload of __class Dataset__, see `__getitem__(self, index)` and `__len__(self)`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def __getitem__(self, index):\n", " #given an index, calculate the positions after this index to truncate the dataset\n", " s_begin = index\n", " s_end = s_begin + self.seq_len\n", " r_begin = s_end - self.label_len\n", " r_end = r_begin + self.label_len + self.pred_len\n", "\n", " #input and output sequence\n", " seq_x = self.data_x[s_begin:s_end]\n", " seq_y = self.data_y[r_begin:r_end]\n", "\n", " #time mark\n", " seq_x_mark = self.data_stamp[s_begin:s_end]\n", " seq_y_mark = self.data_stamp[r_begin:r_end]\n", "\n", " return seq_x, seq_y, seq_x_mark, seq_y_mark\n", "\n", "def __len__(self):\n", " return len(self.data_x) - self.seq_len - self.pred_len + 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can also add an inverse_transform for scaler if needed." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def inverse_transform(self, data):\n", " return self.scaler.inverse_transform(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By now, we have finished constructing the dataset and dataloader. If you want to construct your own data and run it on the net, you can find proper data and try to accomplish the functions listed above. Here are some widely used datasets in times series analysis.\n", "\n", "![common dataset](./dataset.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 8. Running the Experiment and Visualizing Result\n", "\n", "After managing the data, model well, we need to write a shell script for the experiment. In the script, we need to run `run.py` with several arguments, which is part of the configuration. Here, let's see `TimesNet` on task long-term forecast with dataset ETTh1 for example." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "model_name=TimesNet\n", "\n", "\n", "python -u run.py \\\n", " --task_name long_term_forecast \\\n", " --is_training 1 \\\n", " --root_path ./dataset/ETT-small/ \\\n", " --data_path ETTh1.csv \\\n", " --model_id ETTh1_96_96 \\\n", " --model $model_name \\\n", " --data ETTh1 \\\n", " --features M \\\n", " --seq_len 96 \\\n", " --label_len 48 \\\n", " --pred_len 96 \\\n", " --e_layers 2 \\\n", " --d_layers 1 \\\n", " --factor 3 \\\n", " --enc_in 7 \\\n", " --dec_in 7 \\\n", " --c_out 7 \\\n", " --d_model 16 \\\n", " --d_ff 32 \\\n", " --des 'Exp' \\\n", " --itr 1 \\\n", " --top_k 5\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "After finishing the shell script, you can run it in shell using bash. For example, you can run the following command, for `TimesNet` ETTh1 long-term forecast:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here, the bash command may not be successfully implemented due to a lack of proper packages in the environment. If that's the case, simply follow the error information to install the missing package step by step until you achieve success. The sign of a successful experiment running is that information about the experiment is printed out, such as:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "Namespace(task_name='long_term_forecast', is_training=1, model_id='ETTh1_96_96', model='TimesNet', data='ETTh1', root_path='./dataset/ETT-small/', data_path='ETTh1.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', seq_len=96, label_len=48, pred_len=96, seasonal_patterns='Monthly', inverse=False, mask_rate=0.25, anomaly_ratio=0.25, top_k=5, num_kernels=6, enc_in=7, dec_in=7, c_out=7, d_model=16, n_heads=8, e_layers=2, d_layers=1, d_ff=32, moving_avg=25, factor=3, distil=True, dropout=0.1, embed='timeF', activation='gelu', output_attention=False, num_workers=10, itr=1, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', loss='MSE', lradj='type1', use_amp=False, use_gpu=False, gpu=0, use_multi_gpu=False, devices='0,1,2,3', p_hidden_dims=[128, 128], p_hidden_layers=2)\n", "Use GPU: cuda:0\n", ">>>>>>>start training : long_term_forecast_ETTh1_96_96_TimesNet_ETTh1_ftM_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_Exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>\n", "train 8449\n", "val 2785\n", "test 2785" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then, the model starts training. Once one epoch finishes training, information like below will be printer out:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ " iters: 100, epoch: 1 | loss: 0.4701951\n", " speed: 0.2108s/iter; left time: 535.7317s\n", " iters: 200, epoch: 1 | loss: 0.4496171\n", " speed: 0.0615s/iter; left time: 150.0223s\n", "Epoch: 1 cost time: 30.09317970275879\n", "Epoch: 1, Steps: 264 | Train Loss: 0.4964185 Vali Loss: 0.8412074 Test Loss: 0.4290483\n", "Validation loss decreased (inf --> 0.841207). Saving model ...\n", "Updating learning rate to 0.0001" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When all epochs are done, the model steps into testing. The following information about testing will be printed out, giving the MAE and MSE of test." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ ">>>>>>>testing : long_term_forecast_ETTh1_96_96_TimesNet_ETTh1_ftM_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_Exp_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", "test 2785\n", "test shape: (2785, 1, 96, 7) (2785, 1, 96, 7)\n", "test shape: (2785, 96, 7) (2785, 96, 7)\n", "mse:0.3890332877635956, mae:0.41201362013816833" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "After test finishes, some visible information are already stored in the test_results folder in PDF format. For example:\n", "\n", "![result ETTm1 2440](./result.png)" ] } ], "metadata": { "language_info": { "name": "python" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: utils/ADFtest.py ================================================ import pandas as pd import numpy as np import os from statsmodels.tsa.stattools import adfuller from arch.unitroot import ADF def calculate_ADF(root_path,data_path): df_raw = pd.read_csv(os.path.join(root_path,data_path)) cols = list(df_raw.columns) cols.remove('date') df_raw = df_raw[cols] adf_list = [] for i in cols: df_data = df_raw[i] adf = adfuller(df_data, maxlag = 1) print(adf) adf_list.append(adf) return np.array(adf_list) def calculate_target_ADF(root_path,data_path,target='OT'): df_raw = pd.read_csv(os.path.join(root_path,data_path)) target_cols = target.split(',') # df_data = df_raw[target] df_raw = df_raw[target_cols] adf_list = [] for i in target_cols: df_data = df_raw[i] adf = adfuller(df_data, maxlag = 1) # print(adf) adf_list.append(adf) return np.array(adf_list) def archADF(root_path, data_path): df = pd.read_csv(os.path.join(root_path,data_path)) cols = df.columns[1:] stats = 0 for target_col in cols: series = df[target_col].values adf = ADF(series) stat = adf.stat stats += stat return stats/len(cols) if __name__ == '__main__': # * Exchange - result: -1.902402344564288 | report: -1.889 ADFmetric = archADF(root_path="./dataset/exchange_rate/",data_path="exchange_rate.csv") print("Exchange ADF metric", ADFmetric) # * Illness - result: -5.33416661870624 | report: -5.406 ADFmetric = archADF(root_path="./dataset/illness/",data_path="national_illness.csv") print("Illness ADF metric", ADFmetric) # * ETTm2 - result: -5.663628743471695 | report: -6.225 ADFmetric = archADF(root_path="./dataset/ETT-small/",data_path="ETTm2.csv") print("ETTm2 ADF metric", ADFmetric) # * Electricity - result: -8.44485821939281 | report: -8.483 ADFmetric = archADF(root_path="./dataset/electricity/",data_path="electricity.csv") print("Electricity ADF metric", ADFmetric) # * Traffic - result: -15.020978067839014 | report: -15.046 ADFmetric = archADF(root_path="./dataset/traffic/",data_path="traffic.csv") print("Traffic ADF metric", ADFmetric) # * Weather - result: -26.681433085204866 | report: -26.661 ADFmetric = archADF(root_path="./dataset/weather/",data_path="weather.csv") print("Weather ADF metric", ADFmetric) # print(ADFmetric) # mean_ADFmetric = ADFmetric[:,0].mean() # print(mean_ADFmetric) ================================================ FILE: utils/__init__.py ================================================ ================================================ FILE: utils/augmentation.py ================================================ import numpy as np from tqdm import tqdm def jitter(x, sigma=0.03): # https://arxiv.org/pdf/1706.00527.pdf return x + np.random.normal(loc=0., scale=sigma, size=x.shape) def scaling(x, sigma=0.1): # https://arxiv.org/pdf/1706.00527.pdf factor = np.random.normal(loc=1., scale=sigma, size=(x.shape[0],x.shape[2])) return np.multiply(x, factor[:,np.newaxis,:]) def rotation(x): x = np.array(x) flip = np.random.choice([-1, 1], size=(x.shape[0],x.shape[2])) rotate_axis = np.arange(x.shape[2]) np.random.shuffle(rotate_axis) return flip[:,np.newaxis,:] * x[:,:,rotate_axis] def permutation(x, max_segments=5, seg_mode="equal"): orig_steps = np.arange(x.shape[1]) num_segs = np.random.randint(1, max_segments, size=(x.shape[0])) ret = np.zeros_like(x) for i, pat in enumerate(x): if num_segs[i] > 1: if seg_mode == "random": split_points = np.random.choice(x.shape[1]-2, num_segs[i]-1, replace=False) split_points.sort() splits = np.split(orig_steps, split_points) else: splits = np.array_split(orig_steps, num_segs[i]) warp = np.concatenate(np.random.permutation(splits)).ravel() # ? Question: What is the point of making segments? # for i in range(len(splits)): # permute = np.random.permutation(splits[i]) ret[i] = pat[warp] else: ret[i] = pat return ret def magnitude_warp(x, sigma=0.2, knot=4): from scipy.interpolate import CubicSpline orig_steps = np.arange(x.shape[1]) random_warps = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], knot+2, x.shape[2])) warp_steps = (np.ones((x.shape[2],1))*(np.linspace(0, x.shape[1]-1., num=knot+2))).T ret = np.zeros_like(x) for i, pat in enumerate(x): warper = np.array([CubicSpline(warp_steps[:,dim], random_warps[i,:,dim])(orig_steps) for dim in range(x.shape[2])]).T ret[i] = pat * warper return ret def time_warp(x, sigma=0.2, knot=4): from scipy.interpolate import CubicSpline orig_steps = np.arange(x.shape[1]) random_warps = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], knot+2, x.shape[2])) warp_steps = (np.ones((x.shape[2],1))*(np.linspace(0, x.shape[1]-1., num=knot+2))).T ret = np.zeros_like(x) for i, pat in enumerate(x): for dim in range(x.shape[2]): time_warp = CubicSpline(warp_steps[:,dim], warp_steps[:,dim] * random_warps[i,:,dim])(orig_steps) scale = (x.shape[1]-1)/time_warp[-1] ret[i,:,dim] = np.interp(orig_steps, np.clip(scale*time_warp, 0, x.shape[1]-1), pat[:,dim]).T return ret def window_slice(x, reduce_ratio=0.9): # https://halshs.archives-ouvertes.fr/halshs-01357973/document target_len = np.ceil(reduce_ratio*x.shape[1]).astype(int) if target_len >= x.shape[1]: return x starts = np.random.randint(low=0, high=x.shape[1]-target_len, size=(x.shape[0])).astype(int) ends = (target_len + starts).astype(int) ret = np.zeros_like(x) for i, pat in enumerate(x): for dim in range(x.shape[2]): ret[i,:,dim] = np.interp(np.linspace(0, target_len, num=x.shape[1]), np.arange(target_len), pat[starts[i]:ends[i],dim]).T return ret def window_warp(x, window_ratio=0.1, scales=[0.5, 2.]): # https://halshs.archives-ouvertes.fr/halshs-01357973/document warp_scales = np.random.choice(scales, x.shape[0]) warp_size = np.ceil(window_ratio*x.shape[1]).astype(int) window_steps = np.arange(warp_size) window_starts = np.random.randint(low=1, high=x.shape[1]-warp_size-1, size=(x.shape[0])).astype(int) window_ends = (window_starts + warp_size).astype(int) ret = np.zeros_like(x) for i, pat in enumerate(x): for dim in range(x.shape[2]): start_seg = pat[:window_starts[i],dim] window_seg = np.interp(np.linspace(0, warp_size-1, num=int(warp_size*warp_scales[i])), window_steps, pat[window_starts[i]:window_ends[i],dim]) end_seg = pat[window_ends[i]:,dim] warped = np.concatenate((start_seg, window_seg, end_seg)) ret[i,:,dim] = np.interp(np.arange(x.shape[1]), np.linspace(0, x.shape[1]-1., num=warped.size), warped).T return ret def spawner(x, labels, sigma=0.05, verbose=0): # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6983028/ # use verbose=-1 to turn off warnings # use verbose=1 to print out figures import utils.dtw as dtw random_points = np.random.randint(low=1, high=x.shape[1]-1, size=x.shape[0]) window = np.ceil(x.shape[1] / 10.).astype(int) orig_steps = np.arange(x.shape[1]) l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels ret = np.zeros_like(x) # for i, pat in enumerate(tqdm(x)): for i, pat in enumerate(x): # guarentees that same one isnt selected choices = np.delete(np.arange(x.shape[0]), i) # remove ones of different classes choices = np.where(l[choices] == l[i])[0] if choices.size > 0: random_sample = x[np.random.choice(choices)] # SPAWNER splits the path into two randomly path1 = dtw.dtw(pat[:random_points[i]], random_sample[:random_points[i]], dtw.RETURN_PATH, slope_constraint="symmetric", window=window) path2 = dtw.dtw(pat[random_points[i]:], random_sample[random_points[i]:], dtw.RETURN_PATH, slope_constraint="symmetric", window=window) combined = np.concatenate((np.vstack(path1), np.vstack(path2+random_points[i])), axis=1) if verbose: # print(random_points[i]) dtw_value, cost, DTW_map, path = dtw.dtw(pat, random_sample, return_flag = dtw.RETURN_ALL, slope_constraint=slope_constraint, window=window) dtw.draw_graph1d(cost, DTW_map, path, pat, random_sample) dtw.draw_graph1d(cost, DTW_map, combined, pat, random_sample) mean = np.mean([pat[combined[0]], random_sample[combined[1]]], axis=0) for dim in range(x.shape[2]): ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=mean.shape[0]), mean[:,dim]).T else: # if verbose > -1: # print("There is only one pattern of class {}, skipping pattern average".format(l[i])) ret[i,:] = pat return jitter(ret, sigma=sigma) def wdba(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, verbose=0): # https://ieeexplore.ieee.org/document/8215569 # use verbose = -1 to turn off warnings # slope_constraint is for DTW. "symmetric" or "asymmetric" x = np.array(x) import utils.dtw as dtw if use_window: window = np.ceil(x.shape[1] / 10.).astype(int) else: window = None orig_steps = np.arange(x.shape[1]) l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels ret = np.zeros_like(x) # for i in tqdm(range(ret.shape[0])): for i in range(ret.shape[0]): # get the same class as i choices = np.where(l == l[i])[0] if choices.size > 0: # pick random intra-class pattern k = min(choices.size, batch_size) random_prototypes = x[np.random.choice(choices, k, replace=False)] # calculate dtw between all dtw_matrix = np.zeros((k, k)) for p, prototype in enumerate(random_prototypes): for s, sample in enumerate(random_prototypes): if p == s: dtw_matrix[p, s] = 0. else: dtw_matrix[p, s] = dtw.dtw(prototype, sample, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window) # get medoid medoid_id = np.argsort(np.sum(dtw_matrix, axis=1))[0] nearest_order = np.argsort(dtw_matrix[medoid_id]) medoid_pattern = random_prototypes[medoid_id] # start weighted DBA average_pattern = np.zeros_like(medoid_pattern) weighted_sums = np.zeros((medoid_pattern.shape[0])) for nid in nearest_order: if nid == medoid_id or dtw_matrix[medoid_id, nearest_order[1]] == 0.: average_pattern += medoid_pattern weighted_sums += np.ones_like(weighted_sums) else: path = dtw.dtw(medoid_pattern, random_prototypes[nid], dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window) dtw_value = dtw_matrix[medoid_id, nid] warped = random_prototypes[nid, path[1]] weight = np.exp(np.log(0.5)*dtw_value/dtw_matrix[medoid_id, nearest_order[1]]) average_pattern[path[0]] += weight * warped weighted_sums[path[0]] += weight ret[i,:] = average_pattern / weighted_sums[:,np.newaxis] else: # if verbose > -1: # print("There is only one pattern of class {}, skipping pattern average".format(l[i])) ret[i,:] = x[i] return ret # Proposed def random_guided_warp(x, labels, slope_constraint="symmetric", use_window=True, dtw_type="normal", verbose=0): # use verbose = -1 to turn off warnings # slope_constraint is for DTW. "symmetric" or "asymmetric" # dtw_type is for shapeDTW or DTW. "normal" or "shape" import utils.dtw as dtw if use_window: window = np.ceil(x.shape[1] / 10.).astype(int) else: window = None orig_steps = np.arange(x.shape[1]) l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels ret = np.zeros_like(x) # for i, pat in enumerate(tqdm(x)): for i, pat in enumerate(x): # guarentees that same one isnt selected choices = np.delete(np.arange(x.shape[0]), i) # remove ones of different classes choices = np.where(l[choices] == l[i])[0] if choices.size > 0: # pick random intra-class pattern random_prototype = x[np.random.choice(choices)] if dtw_type == "shape": path = dtw.shape_dtw(random_prototype, pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window) else: path = dtw.dtw(random_prototype, pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window) # Time warp warped = pat[path[1]] for dim in range(x.shape[2]): ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), warped[:,dim]).T else: # if verbose > -1: # print("There is only one pattern of class {}, skipping timewarping".format(l[i])) ret[i,:] = pat return ret def random_guided_warp_shape(x, labels, slope_constraint="symmetric", use_window=True): return random_guided_warp(x, labels, slope_constraint, use_window, dtw_type="shape") def discriminative_guided_warp(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, dtw_type="normal", use_variable_slice=True, verbose=0): # use verbose = -1 to turn off warnings # slope_constraint is for DTW. "symmetric" or "asymmetric" # dtw_type is for shapeDTW or DTW. "normal" or "shape" import utils.dtw as dtw if use_window: window = np.ceil(x.shape[1] / 10.).astype(int) else: window = None orig_steps = np.arange(x.shape[1]) l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels positive_batch = np.ceil(batch_size / 2).astype(int) negative_batch = np.floor(batch_size / 2).astype(int) ret = np.zeros_like(x) warp_amount = np.zeros(x.shape[0]) # for i, pat in enumerate(tqdm(x)): for i, pat in enumerate(x): # guarentees that same one isnt selected choices = np.delete(np.arange(x.shape[0]), i) # remove ones of different classes positive = np.where(l[choices] == l[i])[0] negative = np.where(l[choices] != l[i])[0] if positive.size > 0 and negative.size > 0: pos_k = min(positive.size, positive_batch) neg_k = min(negative.size, negative_batch) positive_prototypes = x[np.random.choice(positive, pos_k, replace=False)] negative_prototypes = x[np.random.choice(negative, neg_k, replace=False)] # vector embedding and nearest prototype in one pos_aves = np.zeros((pos_k)) neg_aves = np.zeros((pos_k)) if dtw_type == "shape": for p, pos_prot in enumerate(positive_prototypes): for ps, pos_samp in enumerate(positive_prototypes): if p != ps: pos_aves[p] += (1./(pos_k-1.))*dtw.shape_dtw(pos_prot, pos_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window) for ns, neg_samp in enumerate(negative_prototypes): neg_aves[p] += (1./neg_k)*dtw.shape_dtw(pos_prot, neg_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window) selected_id = np.argmax(neg_aves - pos_aves) path = dtw.shape_dtw(positive_prototypes[selected_id], pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window) else: for p, pos_prot in enumerate(positive_prototypes): for ps, pos_samp in enumerate(positive_prototypes): if p != ps: pos_aves[p] += (1./(pos_k-1.))*dtw.dtw(pos_prot, pos_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window) for ns, neg_samp in enumerate(negative_prototypes): neg_aves[p] += (1./neg_k)*dtw.dtw(pos_prot, neg_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window) selected_id = np.argmax(neg_aves - pos_aves) path = dtw.dtw(positive_prototypes[selected_id], pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window) # Time warp warped = pat[path[1]] warp_path_interp = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), path[1]) warp_amount[i] = np.sum(np.abs(orig_steps-warp_path_interp)) for dim in range(x.shape[2]): ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), warped[:,dim]).T else: # if verbose > -1: # print("There is only one pattern of class {}".format(l[i])) ret[i,:] = pat warp_amount[i] = 0. if use_variable_slice: max_warp = np.max(warp_amount) if max_warp == 0: # unchanged ret = window_slice(ret, reduce_ratio=0.9) else: for i, pat in enumerate(ret): # Variable Sllicing ret[i] = window_slice(pat[np.newaxis,:,:], reduce_ratio=0.9+0.1*warp_amount[i]/max_warp)[0] return ret def discriminative_guided_warp_shape(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True): return discriminative_guided_warp(x, labels, batch_size, slope_constraint, use_window, dtw_type="shape") def run_augmentation(x, y, args): print("Augmenting %s"%args.data) np.random.seed(args.seed) x_aug = x y_aug = y if args.augmentation_ratio > 0: augmentation_tags = "%d"%args.augmentation_ratio for n in range(args.augmentation_ratio): x_temp, augmentation_tags = augment(x, y, args) x_aug = np.append(x_aug, x_temp, axis=0) y_aug = np.append(y_aug, y, axis=0) print("Round %d: %s done"%(n, augmentation_tags)) if args.extra_tag: augmentation_tags += "_"+args.extra_tag else: augmentation_tags = args.extra_tag return x_aug, y_aug, augmentation_tags def run_augmentation_single(x, y, args): # print("Augmenting %s"%args.data) np.random.seed(args.seed) x_aug = x y_aug = y if len(x.shape)<3: # Augmenting on the entire series: using the input data as "One Big Batch" # Before - (sequence_length, num_channels) # After - (1, sequence_length, num_channels) # Note: the 'sequence_length' here is actually the length of the entire series x_input = x[np.newaxis,:] elif len(x.shape)==3: # Augmenting on the batch series: keep current dimension (batch_size, sequence_length, num_channels) x_input = x else: raise ValueError("Input must be (batch_size, sequence_length, num_channels) dimensional") if args.augmentation_ratio > 0: augmentation_tags = "%d"%args.augmentation_ratio for n in range(args.augmentation_ratio): x_aug, augmentation_tags = augment(x_input, y, args) # print("Round %d: %s done"%(n, augmentation_tags)) if args.extra_tag: augmentation_tags += "_"+args.extra_tag else: augmentation_tags = args.extra_tag if(len(x.shape)<3): # Reverse to two-dimensional in whole series augmentation scenario x_aug = x_aug.squeeze(0) return x_aug, y_aug, augmentation_tags def augment(x, y, args): import utils.augmentation as aug augmentation_tags = "" if args.jitter: x = aug.jitter(x) augmentation_tags += "_jitter" if args.scaling: x = aug.scaling(x) augmentation_tags += "_scaling" if args.rotation: x = aug.rotation(x) augmentation_tags += "_rotation" if args.permutation: x = aug.permutation(x) augmentation_tags += "_permutation" if args.randompermutation: x = aug.permutation(x, seg_mode="random") augmentation_tags += "_randomperm" if args.magwarp: x = aug.magnitude_warp(x) augmentation_tags += "_magwarp" if args.timewarp: x = aug.time_warp(x) augmentation_tags += "_timewarp" if args.windowslice: x = aug.window_slice(x) augmentation_tags += "_windowslice" if args.windowwarp: x = aug.window_warp(x) augmentation_tags += "_windowwarp" if args.spawner: x = aug.spawner(x, y) augmentation_tags += "_spawner" if args.dtwwarp: x = aug.random_guided_warp(x, y) augmentation_tags += "_rgw" if args.shapedtwwarp: x = aug.random_guided_warp_shape(x, y) augmentation_tags += "_rgws" if args.wdba: x = aug.wdba(x, y) augmentation_tags += "_wdba" if args.discdtw: x = aug.discriminative_guided_warp(x, y) augmentation_tags += "_dgw" if args.discsdtw: x = aug.discriminative_guided_warp_shape(x, y) augmentation_tags += "_dgws" return x, augmentation_tags ================================================ FILE: utils/dtw.py ================================================ __author__ = 'Brian Iwana' import numpy as np import math import sys RETURN_VALUE = 0 RETURN_PATH = 1 RETURN_ALL = -1 # Core DTW def _traceback(DTW, slope_constraint): i, j = np.array(DTW.shape) - 1 p, q = [i-1], [j-1] if slope_constraint == "asymmetric": while (i > 1): tb = np.argmin((DTW[i-1, j], DTW[i-1, j-1], DTW[i-1, j-2])) if (tb == 0): i = i - 1 elif (tb == 1): i = i - 1 j = j - 1 elif (tb == 2): i = i - 1 j = j - 2 p.insert(0, i-1) q.insert(0, j-1) elif slope_constraint == "symmetric": while (i > 1 or j > 1): tb = np.argmin((DTW[i-1, j-1], DTW[i-1, j], DTW[i, j-1])) if (tb == 0): i = i - 1 j = j - 1 elif (tb == 1): i = i - 1 elif (tb == 2): j = j - 1 p.insert(0, i-1) q.insert(0, j-1) else: sys.exit("Unknown slope constraint %s"%slope_constraint) return (np.array(p), np.array(q)) def dtw(prototype, sample, return_flag = RETURN_VALUE, slope_constraint="asymmetric", window=None): """ Computes the DTW of two sequences. :param prototype: np array [0..b] :param sample: np array [0..t] :param extended: bool """ p = prototype.shape[0] assert p != 0, "Prototype empty!" s = sample.shape[0] assert s != 0, "Sample empty!" if window is None: window = s cost = np.full((p, s), np.inf) for i in range(p): start = max(0, i-window) end = min(s, i+window)+1 cost[i,start:end]=np.linalg.norm(sample[start:end] - prototype[i], axis=1) DTW = _cummulative_matrix(cost, slope_constraint, window) if return_flag == RETURN_ALL: return DTW[-1,-1], cost, DTW[1:,1:], _traceback(DTW, slope_constraint) elif return_flag == RETURN_PATH: return _traceback(DTW, slope_constraint) else: return DTW[-1,-1] def _cummulative_matrix(cost, slope_constraint, window): p = cost.shape[0] s = cost.shape[1] # Note: DTW is one larger than cost and the original patterns DTW = np.full((p+1, s+1), np.inf) DTW[0, 0] = 0.0 if slope_constraint == "asymmetric": for i in range(1, p+1): if i <= window+1: DTW[i,1] = cost[i-1,0] + min(DTW[i-1,0], DTW[i-1,1]) for j in range(max(2, i-window), min(s, i+window)+1): DTW[i,j] = cost[i-1,j-1] + min(DTW[i-1,j-2], DTW[i-1,j-1], DTW[i-1,j]) elif slope_constraint == "symmetric": for i in range(1, p+1): for j in range(max(1, i-window), min(s, i+window)+1): DTW[i,j] = cost[i-1,j-1] + min(DTW[i-1,j-1], DTW[i,j-1], DTW[i-1,j]) else: sys.exit("Unknown slope constraint %s"%slope_constraint) return DTW def shape_dtw(prototype, sample, return_flag = RETURN_VALUE, slope_constraint="asymmetric", window=None, descr_ratio=0.05): """ Computes the shapeDTW of two sequences. :param prototype: np array [0..b] :param sample: np array [0..t] :param extended: bool """ # shapeDTW # https://www.sciencedirect.com/science/article/pii/S0031320317303710 p = prototype.shape[0] assert p != 0, "Prototype empty!" s = sample.shape[0] assert s != 0, "Sample empty!" if window is None: window = s p_feature_len = np.clip(np.round(p * descr_ratio), 5, 100).astype(int) s_feature_len = np.clip(np.round(s * descr_ratio), 5, 100).astype(int) # padding p_pad_front = (np.ceil(p_feature_len / 2.)).astype(int) p_pad_back = (np.floor(p_feature_len / 2.)).astype(int) s_pad_front = (np.ceil(s_feature_len / 2.)).astype(int) s_pad_back = (np.floor(s_feature_len / 2.)).astype(int) prototype_pad = np.pad(prototype, ((p_pad_front, p_pad_back), (0, 0)), mode="edge") sample_pad = np.pad(sample, ((s_pad_front, s_pad_back), (0, 0)), mode="edge") p_p = prototype_pad.shape[0] s_p = sample_pad.shape[0] cost = np.full((p, s), np.inf) for i in range(p): for j in range(max(0, i-window), min(s, i+window)): cost[i, j] = np.linalg.norm(sample_pad[j:j+s_feature_len] - prototype_pad[i:i+p_feature_len]) DTW = _cummulative_matrix(cost, slope_constraint=slope_constraint, window=window) if return_flag == RETURN_ALL: return DTW[-1,-1], cost, DTW[1:,1:], _traceback(DTW, slope_constraint) elif return_flag == RETURN_PATH: return _traceback(DTW, slope_constraint) else: return DTW[-1,-1] # Draw helpers def draw_graph2d(cost, DTW, path, prototype, sample): import matplotlib.pyplot as plt plt.figure(figsize=(12, 8)) # plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01) #cost plt.subplot(2, 3, 1) plt.imshow(cost.T, cmap=plt.cm.gray, interpolation='none', origin='lower') plt.plot(path[0], path[1], 'y') plt.xlim((-0.5, cost.shape[0]-0.5)) plt.ylim((-0.5, cost.shape[0]-0.5)) #dtw plt.subplot(2, 3, 2) plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation='none', origin='lower') plt.plot(path[0]+1, path[1]+1, 'y') plt.xlim((-0.5, DTW.shape[0]-0.5)) plt.ylim((-0.5, DTW.shape[0]-0.5)) #prototype plt.subplot(2, 3, 4) plt.plot(prototype[:,0], prototype[:,1], 'b-o') #connection plt.subplot(2, 3, 5) for i in range(0,path[0].shape[0]): plt.plot([prototype[path[0][i],0], sample[path[1][i],0]],[prototype[path[0][i],1], sample[path[1][i],1]], 'y-') plt.plot(sample[:,0], sample[:,1], 'g-o') plt.plot(prototype[:,0], prototype[:,1], 'b-o') #sample plt.subplot(2, 3, 6) plt.plot(sample[:,0], sample[:,1], 'g-o') plt.tight_layout() plt.show() def draw_graph1d(cost, DTW, path, prototype, sample): import matplotlib.pyplot as plt plt.figure(figsize=(12, 8)) # plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01) p_steps = np.arange(prototype.shape[0]) s_steps = np.arange(sample.shape[0]) #cost plt.subplot(2, 3, 1) plt.imshow(cost.T, cmap=plt.cm.gray, interpolation='none', origin='lower') plt.plot(path[0], path[1], 'y') plt.xlim((-0.5, cost.shape[0]-0.5)) plt.ylim((-0.5, cost.shape[0]-0.5)) #dtw plt.subplot(2, 3, 2) plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation='none', origin='lower') plt.plot(path[0]+1, path[1]+1, 'y') plt.xlim((-0.5, DTW.shape[0]-0.5)) plt.ylim((-0.5, DTW.shape[0]-0.5)) #prototype plt.subplot(2, 3, 4) plt.plot(p_steps, prototype[:,0], 'b-o') #connection plt.subplot(2, 3, 5) for i in range(0,path[0].shape[0]): plt.plot([path[0][i], path[1][i]],[prototype[path[0][i],0], sample[path[1][i],0]], 'y-') plt.plot(p_steps, sample[:,0], 'g-o') plt.plot(s_steps, prototype[:,0], 'b-o') #sample plt.subplot(2, 3, 6) plt.plot(s_steps, sample[:,0], 'g-o') plt.tight_layout() plt.show() ================================================ FILE: utils/dtw_metric.py ================================================ from numpy import array, zeros, full, argmin, inf, ndim from scipy.spatial.distance import cdist from math import isinf def dtw(x, y, dist, warp=1, w=inf, s=1.0): """ Computes Dynamic Time Warping (DTW) of two sequences. :param array x: N1*M array :param array y: N2*M array :param func dist: distance used as cost measure :param int warp: how many shifts are computed. :param int w: window size limiting the maximal distance between indices of matched entries |i,j|. :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path. """ assert len(x) assert len(y) assert isinf(w) or (w >= abs(len(x) - len(y))) assert s > 0 r, c = len(x), len(y) if not isinf(w): D0 = full((r + 1, c + 1), inf) for i in range(1, r + 1): D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0 D0[0, 0] = 0 else: D0 = zeros((r + 1, c + 1)) D0[0, 1:] = inf D0[1:, 0] = inf D1 = D0[1:, 1:] # view for i in range(r): for j in range(c): if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))): D1[i, j] = dist(x[i], y[j]) C = D1.copy() jrange = range(c) for i in range(r): if not isinf(w): jrange = range(max(0, i - w), min(c, i + w + 1)) for j in jrange: min_list = [D0[i, j]] for k in range(1, warp + 1): i_k = min(i + k, r) j_k = min(j + k, c) min_list += [D0[i_k, j] * s, D0[i, j_k] * s] D1[i, j] += min(min_list) if len(x) == 1: path = zeros(len(y)), range(len(y)) elif len(y) == 1: path = range(len(x)), zeros(len(x)) else: path = _traceback(D0) return D1[-1, -1], C, D1, path def accelerated_dtw(x, y, dist, warp=1): """ Computes Dynamic Time Warping (DTW) of two sequences in a faster way. Instead of iterating through each element and calculating each distance, this uses the cdist function from scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html) :param array x: N1*M array :param array y: N2*M array :param string or func dist: distance parameter for cdist. When string is given, cdist uses optimized functions for the distance metrics. If a string is passed, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'. :param int warp: how many shifts are computed. Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path. """ assert len(x) assert len(y) if ndim(x) == 1: x = x.reshape(-1, 1) if ndim(y) == 1: y = y.reshape(-1, 1) r, c = len(x), len(y) D0 = zeros((r + 1, c + 1)) D0[0, 1:] = inf D0[1:, 0] = inf D1 = D0[1:, 1:] D0[1:, 1:] = cdist(x, y, dist) C = D1.copy() for i in range(r): for j in range(c): min_list = [D0[i, j]] for k in range(1, warp + 1): min_list += [D0[min(i + k, r), j], D0[i, min(j + k, c)]] D1[i, j] += min(min_list) if len(x) == 1: path = zeros(len(y)), range(len(y)) elif len(y) == 1: path = range(len(x)), zeros(len(x)) else: path = _traceback(D0) return D1[-1, -1], C, D1, path def _traceback(D): i, j = array(D.shape) - 2 p, q = [i], [j] while (i > 0) or (j > 0): tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j])) if tb == 0: i -= 1 j -= 1 elif tb == 1: i -= 1 else: # (tb == 2): j -= 1 p.insert(0, i) q.insert(0, j) return array(p), array(q) if __name__ == '__main__': w = inf s = 1.0 if 1: # 1-D numeric from sklearn.metrics.pairwise import manhattan_distances x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0] y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0] dist_fun = manhattan_distances w = 1 # s = 1.2 elif 0: # 2-D numeric from sklearn.metrics.pairwise import euclidean_distances x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]] y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]] dist_fun = euclidean_distances else: # 1-D list of strings from nltk.metrics.distance import edit_distance # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder'] # y = ['class', 'too'] x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls'] y = ['see', 'drown', 'himself'] # x = 'we talked about the situation'.split() # y = 'we talked about the situation'.split() dist_fun = edit_distance dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s) # Vizualize from matplotlib import pyplot as plt plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest') plt.plot(path[0], path[1], '-o') # relation plt.xticks(range(len(x)), x) plt.yticks(range(len(y)), y) plt.xlabel('x') plt.ylabel('y') plt.axis('tight') if isinf(w): plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s)) else: plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s)) plt.show() ================================================ FILE: utils/losses.py ================================================ # This source code is provided for the purposes of scientific reproducibility # under the following limited license from Element AI Inc. The code is an # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis # expansion analysis for interpretable time series forecasting, # https://arxiv.org/abs/1905.10437). The copyright to the source code is # licensed under the Creative Commons - Attribution-NonCommercial 4.0 # International license (CC BY-NC 4.0): # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether # for the benefit of third parties or internally in production) requires an # explicit license. The subject-matter of the N-BEATS model and associated # materials are the property of Element AI Inc. and may be subject to patent # protection. No license to patents is granted hereunder (whether express or # implied). Copyright © 2020 Element AI Inc. All rights reserved. """ Loss functions for PyTorch. """ import torch as t import torch.nn as nn import numpy as np import pdb def divide_no_nan(a, b): """ a/b where the resulted NaN or Inf are replaced by 0. """ result = a / b result[result != result] = .0 result[result == np.inf] = .0 return result class mape_loss(nn.Module): def __init__(self): super(mape_loss, self).__init__() def forward(self, insample: t.Tensor, freq: int, forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: """ MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error :param forecast: Forecast values. Shape: batch, time :param target: Target values. Shape: batch, time :param mask: 0/1 mask. Shape: batch, time :return: Loss value """ weights = divide_no_nan(mask, target) return t.mean(t.abs((forecast - target) * weights)) class smape_loss(nn.Module): def __init__(self): super(smape_loss, self).__init__() def forward(self, insample: t.Tensor, freq: int, forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: """ sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) :param forecast: Forecast values. Shape: batch, time :param target: Target values. Shape: batch, time :param mask: 0/1 mask. Shape: batch, time :return: Loss value """ return 200 * t.mean(divide_no_nan(t.abs(forecast - target), t.abs(forecast.data) + t.abs(target.data)) * mask) class mase_loss(nn.Module): def __init__(self): super(mase_loss, self).__init__() def forward(self, insample: t.Tensor, freq: int, forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: """ MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf :param insample: Insample values. Shape: batch, time_i :param freq: Frequency value :param forecast: Forecast values. Shape: batch, time_o :param target: Target values. Shape: batch, time_o :param mask: 0/1 mask. Shape: batch, time_o :return: Loss value """ masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1) masked_masep_inv = divide_no_nan(mask, masep[:, None]) return t.mean(t.abs(target - forecast) * masked_masep_inv) ================================================ FILE: utils/m4_summary.py ================================================ # This source code is provided for the purposes of scientific reproducibility # under the following limited license from Element AI Inc. The code is an # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis # expansion analysis for interpretable time series forecasting, # https://arxiv.org/abs/1905.10437). The copyright to the source code is # licensed under the Creative Commons - Attribution-NonCommercial 4.0 # International license (CC BY-NC 4.0): # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether # for the benefit of third parties or internally in production) requires an # explicit license. The subject-matter of the N-BEATS model and associated # materials are the property of Element AI Inc. and may be subject to patent # protection. No license to patents is granted hereunder (whether express or # implied). Copyright 2020 Element AI Inc. All rights reserved. """ M4 Summary """ from collections import OrderedDict import numpy as np import pandas as pd from data_provider.m4 import M4Dataset from data_provider.m4 import M4Meta import os def group_values(values, groups, group_name): return np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) def mase(forecast, insample, outsample, frequency): return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) def smape_2(forecast, target): denom = np.abs(target) + np.abs(forecast) # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. denom[denom == 0.0] = 1.0 return 200 * np.abs(forecast - target) / denom def mape(forecast, target): denom = np.abs(target) # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. denom[denom == 0.0] = 1.0 return 100 * np.abs(forecast - target) / denom class M4Summary: def __init__(self, file_path, root_path): self.file_path = file_path self.training_set = M4Dataset.load(training=True, dataset_file=root_path) self.test_set = M4Dataset.load(training=False, dataset_file=root_path) self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') def evaluate(self): """ Evaluate forecasts using M4 test dataset. :param forecast: Forecasts. Shape: timeseries, time. :return: sMAPE and OWA grouped by seasonal patterns. """ grouped_owa = OrderedDict() naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts]) model_mases = {} naive2_smapes = {} naive2_mases = {} grouped_smapes = {} grouped_mapes = {} for group_name in M4Meta.seasonal_patterns: file_name = self.file_path + group_name + "_forecast.csv" if os.path.exists(file_name): model_forecast = pd.read_csv(file_name).values naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) target = group_values(self.test_set.values, self.test_set.groups, group_name) # all timeseries within group have same frequency frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] insample = group_values(self.training_set.values, self.test_set.groups, group_name) model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], insample=insample[i], outsample=target[i], frequency=frequency) for i in range(len(model_forecast))]) naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], insample=insample[i], outsample=target[i], frequency=frequency) for i in range(len(model_forecast))]) naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) grouped_smapes = self.summarize_groups(grouped_smapes) grouped_mapes = self.summarize_groups(grouped_mapes) grouped_model_mases = self.summarize_groups(model_mases) grouped_naive2_smapes = self.summarize_groups(naive2_smapes) grouped_naive2_mases = self.summarize_groups(naive2_mases) for k in grouped_model_mases.keys(): grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 def round_all(d): return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( grouped_model_mases) def summarize_groups(self, scores): """ Re-group scores respecting M4 rules. :param scores: Scores per group. :return: Grouped scores. """ scores_summary = OrderedDict() def group_count(group_name): return len(np.where(self.test_set.groups == group_name)[0]) weighted_score = {} for g in ['Yearly', 'Quarterly', 'Monthly']: weighted_score[g] = scores[g] * group_count(g) scores_summary[g] = scores[g] others_score = 0 others_count = 0 for g in ['Weekly', 'Daily', 'Hourly']: others_score += scores[g] * group_count(g) others_count += group_count(g) weighted_score['Others'] = others_score scores_summary['Others'] = others_score / others_count average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) scores_summary['Average'] = average return scores_summary ================================================ FILE: utils/masking.py ================================================ import torch class TriangularCausalMask(): def __init__(self, B, L, device="cpu"): mask_shape = [B, 1, L, L] with torch.no_grad(): self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) @property def mask(self): return self._mask class ProbMask(): def __init__(self, B, H, L, index, scores, device="cpu"): _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) indicator = _mask_ex[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :].to(device) self._mask = indicator.view(scores.shape).to(device) @property def mask(self): return self._mask ================================================ FILE: utils/metrics.py ================================================ import numpy as np def RSE(pred, true): return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) def CORR(pred, true): u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) return (u / d).mean(-1) def MAE(pred, true): return np.mean(np.abs(true - pred)) def MSE(pred, true): return np.mean((true - pred) ** 2) def RMSE(pred, true): return np.sqrt(MSE(pred, true)) def MAPE(pred, true): return np.mean(np.abs((true - pred) / true)) def MSPE(pred, true): return np.mean(np.square((true - pred) / true)) def metric(pred, true): mae = MAE(pred, true) mse = MSE(pred, true) rmse = RMSE(pred, true) mape = MAPE(pred, true) mspe = MSPE(pred, true) return mae, mse, rmse, mape, mspe ================================================ FILE: utils/print_args.py ================================================ def print_args(args): print("\033[1m" + "Basic Config" + "\033[0m") print(f' {"Task Name:":<20}{args.task_name:<20}{"Is Training:":<20}{args.is_training:<20}') print(f' {"Model ID:":<20}{args.model_id:<20}{"Model:":<20}{args.model:<20}') print() print("\033[1m" + "Data Loader" + "\033[0m") print(f' {"Data:":<20}{args.data:<20}{"Root Path:":<20}{args.root_path:<20}') print(f' {"Data Path:":<20}{args.data_path:<20}{"Features:":<20}{args.features:<20}') print(f' {"Target:":<20}{args.target:<20}{"Freq:":<20}{args.freq:<20}') print(f' {"Checkpoints:":<20}{args.checkpoints:<20}') print() if args.task_name in ['long_term_forecast', 'short_term_forecast']: print("\033[1m" + "Forecasting Task" + "\033[0m") print(f' {"Seq Len:":<20}{args.seq_len:<20}{"Label Len:":<20}{args.label_len:<20}') print(f' {"Pred Len:":<20}{args.pred_len:<20}{"Seasonal Patterns:":<20}{args.seasonal_patterns:<20}') print(f' {"Inverse:":<20}{args.inverse:<20}') print() if args.task_name == 'imputation': print("\033[1m" + "Imputation Task" + "\033[0m") print(f' {"Mask Rate:":<20}{args.mask_rate:<20}') print() if args.task_name == 'anomaly_detection': print("\033[1m" + "Anomaly Detection Task" + "\033[0m") print(f' {"Anomaly Ratio:":<20}{args.anomaly_ratio:<20}') print() print("\033[1m" + "Model Parameters" + "\033[0m") print(f' {"Top k:":<20}{args.top_k:<20}{"Num Kernels:":<20}{args.num_kernels:<20}') print(f' {"Enc In:":<20}{args.enc_in:<20}{"Dec In:":<20}{args.dec_in:<20}') print(f' {"C Out:":<20}{args.c_out:<20}{"d model:":<20}{args.d_model:<20}') print(f' {"n heads:":<20}{args.n_heads:<20}{"e layers:":<20}{args.e_layers:<20}') print(f' {"d layers:":<20}{args.d_layers:<20}{"d FF:":<20}{args.d_ff:<20}') print(f' {"Moving Avg:":<20}{args.moving_avg:<20}{"Factor:":<20}{args.factor:<20}') print(f' {"Distil:":<20}{args.distil:<20}{"Dropout:":<20}{args.dropout:<20}') print(f' {"Embed:":<20}{args.embed:<20}{"Activation:":<20}{args.activation:<20}') print() print("\033[1m" + "Run Parameters" + "\033[0m") print(f' {"Num Workers:":<20}{args.num_workers:<20}{"Itr:":<20}{args.itr:<20}') print(f' {"Train Epochs:":<20}{args.train_epochs:<20}{"Batch Size:":<20}{args.batch_size:<20}') print(f' {"Patience:":<20}{args.patience:<20}{"Learning Rate:":<20}{args.learning_rate:<20}') print(f' {"Des:":<20}{args.des:<20}{"Loss:":<20}{args.loss:<20}') print(f' {"Lradj:":<20}{args.lradj:<20}{"Use Amp:":<20}{args.use_amp:<20}') print() print("\033[1m" + "GPU" + "\033[0m") print(f' {"Use GPU:":<20}{args.use_gpu:<20}{"GPU:":<20}{args.gpu:<20}') print(f' {"Use Multi GPU:":<20}{args.use_multi_gpu:<20}{"Devices:":<20}{args.devices:<20}') print() print("\033[1m" + "De-stationary Projector Params" + "\033[0m") p_hidden_dims_str = ', '.join(map(str, args.p_hidden_dims)) print(f' {"P Hidden Dims:":<20}{p_hidden_dims_str:<20}{"P Hidden Layers:":<20}{args.p_hidden_layers:<20}') print() ================================================ FILE: utils/timefeatures.py ================================================ # From: gluonts/src/gluonts/time_feature/_base.py # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). # You may not use this file except in compliance with the License. # A copy of the License is located at # # http://www.apache.org/licenses/LICENSE-2.0 # # or in the "license" file accompanying this file. This file is distributed # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the License for the specific language governing # permissions and limitations under the License. from typing import List import numpy as np import pandas as pd from pandas.tseries import offsets from pandas.tseries.frequencies import to_offset class TimeFeature: def __init__(self): pass def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: pass def __repr__(self): return self.__class__.__name__ + "()" class SecondOfMinute(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.second / 59.0 - 0.5 class MinuteOfHour(TimeFeature): """Minute of hour encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.minute / 59.0 - 0.5 class HourOfDay(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.hour / 23.0 - 0.5 class DayOfWeek(TimeFeature): """Hour of day encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return index.dayofweek / 6.0 - 0.5 class DayOfMonth(TimeFeature): """Day of month encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.day - 1) / 30.0 - 0.5 class DayOfYear(TimeFeature): """Day of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.dayofyear - 1) / 365.0 - 0.5 class MonthOfYear(TimeFeature): """Month of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.month - 1) / 11.0 - 0.5 class WeekOfYear(TimeFeature): """Week of year encoded as value between [-0.5, 0.5]""" def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: return (index.isocalendar().week - 1) / 52.0 - 0.5 def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: """ Returns a list of time features that will be appropriate for the given frequency string. Parameters ---------- freq_str Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. """ features_by_offsets = { offsets.YearEnd: [], offsets.QuarterEnd: [MonthOfYear], offsets.MonthEnd: [MonthOfYear], offsets.Week: [DayOfMonth, WeekOfYear], offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], offsets.Minute: [ MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear, ], offsets.Second: [ SecondOfMinute, MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear, ], } offset = to_offset(freq_str) for offset_type, feature_classes in features_by_offsets.items(): if isinstance(offset, offset_type): return [cls() for cls in feature_classes] supported_freq_msg = f""" Unsupported frequency {freq_str} The following frequencies are supported: Y - yearly alias: A M - monthly W - weekly D - daily B - business days H - hourly T - minutely alias: min S - secondly """ raise RuntimeError(supported_freq_msg) def time_features(dates, freq='h'): return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) ================================================ FILE: utils/tools.py ================================================ import os import numpy as np import torch import matplotlib.pyplot as plt import pandas as pd import math plt.switch_backend('agg') def adjust_learning_rate(optimizer, epoch, args): # lr = args.learning_rate * (0.2 ** (epoch // 2)) if args.lradj == 'type1': lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} elif args.lradj == 'type2': lr_adjust = { 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 10: 5e-7, 15: 1e-7, 20: 5e-8 } elif args.lradj == 'type3': lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} elif args.lradj == "cosine": lr_adjust = {epoch: args.learning_rate /2 * (1 + math.cos(epoch / args.train_epochs * math.pi))} if epoch in lr_adjust.keys(): lr = lr_adjust[epoch] for param_group in optimizer.param_groups: param_group['lr'] = lr print('Updating learning rate to {}'.format(lr)) class EarlyStopping: def __init__(self, patience=7, verbose=False, delta=0): self.patience = patience self.verbose = verbose self.counter = 0 self.best_score = None self.early_stop = False self.val_loss_min = np.inf self.delta = delta def __call__(self, val_loss, model, path): score = -val_loss if self.best_score is None: self.best_score = score self.save_checkpoint(val_loss, model, path) elif score < self.best_score + self.delta: self.counter += 1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(val_loss, model, path) self.counter = 0 def save_checkpoint(self, val_loss, model, path): if self.verbose: print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') self.val_loss_min = val_loss class dotdict(dict): """dot.notation access to dictionary attributes""" __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ class StandardScaler(): def __init__(self, mean, std): self.mean = mean self.std = std def transform(self, data): return (data - self.mean) / self.std def inverse_transform(self, data): return (data * self.std) + self.mean def visual(true, preds=None, name='./pic/test.pdf'): """ Results visualization """ plt.figure() if preds is not None: plt.plot(preds, label='Prediction', linewidth=2) plt.plot(true, label='GroundTruth', linewidth=2) plt.legend() plt.savefig(name, bbox_inches='tight') def adjustment(gt, pred): anomaly_state = False for i in range(len(gt)): if gt[i] == 1 and pred[i] == 1 and not anomaly_state: anomaly_state = True for j in range(i, -1, -1): if gt[j] == 0: break else: if pred[j] == 0: pred[j] = 1 for j in range(i, len(gt)): if gt[j] == 0: break else: if pred[j] == 0: pred[j] = 1 elif gt[i] == 0: anomaly_state = False if anomaly_state: pred[i] = 1 return gt, pred def cal_accuracy(y_pred, y_true): return np.mean(y_pred == y_true)