Repository: thuml/Time-Series-Library
Branch: main
Commit: 7c2820986dcd
Files: 363
Total size: 1.2 MB
Directory structure:
gitextract_frlvujza/
├── .gitignore
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── README_zh.md
├── data_provider/
│ ├── __init__.py
│ ├── data_factory.py
│ ├── data_loader.py
│ ├── m4.py
│ └── uea.py
├── docker-compose.yml
├── exp/
│ ├── __init__.py
│ ├── exp_anomaly_detection.py
│ ├── exp_basic.py
│ ├── exp_classification.py
│ ├── exp_imputation.py
│ ├── exp_long_term_forecasting.py
│ ├── exp_short_term_forecasting.py
│ └── exp_zero_shot_forecasting.py
├── layers/
│ ├── AutoCorrelation.py
│ ├── Autoformer_EncDec.py
│ ├── Conv_Blocks.py
│ ├── Crossformer_EncDec.py
│ ├── DWT_Decomposition.py
│ ├── ETSformer_EncDec.py
│ ├── Embed.py
│ ├── FourierCorrelation.py
│ ├── MSGBlock.py
│ ├── MambaBlock.py
│ ├── MultiWaveletCorrelation.py
│ ├── Pyraformer_EncDec.py
│ ├── SelfAttention_Family.py
│ ├── StandardNorm.py
│ ├── TimeFilter_layers.py
│ ├── Transformer_EncDec.py
│ └── __init__.py
├── models/
│ ├── Autoformer.py
│ ├── Chronos.py
│ ├── Chronos2.py
│ ├── Crossformer.py
│ ├── DLinear.py
│ ├── ETSformer.py
│ ├── FEDformer.py
│ ├── FiLM.py
│ ├── FreTS.py
│ ├── Informer.py
│ ├── KANAD.py
│ ├── Koopa.py
│ ├── LightTS.py
│ ├── MICN.py
│ ├── MSGNet.py
│ ├── Mamba.py
│ ├── MambaSimple.py
│ ├── MambaSingleLayer.py
│ ├── Moirai.py
│ ├── MultiPatchFormer.py
│ ├── Nonstationary_Transformer.py
│ ├── PAttn.py
│ ├── PatchTST.py
│ ├── Pyraformer.py
│ ├── Reformer.py
│ ├── SCINet.py
│ ├── SegRNN.py
│ ├── Sundial.py
│ ├── TSMixer.py
│ ├── TemporalFusionTransformer.py
│ ├── TiDE.py
│ ├── TiRex.py
│ ├── TimeFilter.py
│ ├── TimeMixer.py
│ ├── TimeMoE.py
│ ├── TimeXer.py
│ ├── TimesFM.py
│ ├── TimesNet.py
│ ├── Transformer.py
│ ├── WPMixer.py
│ ├── __init__.py
│ └── iTransformer.py
├── requirements.txt
├── run.py
├── scripts/
│ ├── anomaly_detection/
│ │ ├── MSL/
│ │ │ ├── Autoformer.sh
│ │ │ ├── Crossformer.sh
│ │ │ ├── DLinear.sh
│ │ │ ├── ETSformer.sh
│ │ │ ├── FEDformer.sh
│ │ │ ├── FiLM.sh
│ │ │ ├── Informer.sh
│ │ │ ├── KANAD.sh
│ │ │ ├── LightTS.sh
│ │ │ ├── MICN.sh
│ │ │ ├── Pyraformer.sh
│ │ │ ├── Reformer.sh
│ │ │ ├── TimesNet.sh
│ │ │ ├── Transformer.sh
│ │ │ └── iTransformer.sh
│ │ ├── PSM/
│ │ │ ├── Autoformer.sh
│ │ │ ├── DLinear.sh
│ │ │ ├── KANAD.sh
│ │ │ ├── TimesNet.sh
│ │ │ └── Transformer.sh
│ │ ├── SMAP/
│ │ │ ├── Autoformer.sh
│ │ │ ├── KANAD.sh
│ │ │ ├── TimesNet.sh
│ │ │ └── Transformer.sh
│ │ ├── SMD/
│ │ │ ├── Autoformer.sh
│ │ │ ├── KANAD.sh
│ │ │ ├── TimesNet.sh
│ │ │ └── Transformer.sh
│ │ └── SWAT/
│ │ ├── Autoformer.sh
│ │ ├── KANAD.sh
│ │ ├── TimesNet.sh
│ │ └── Transformer.sh
│ ├── classification/
│ │ ├── Autoformer.sh
│ │ ├── Crossformer.sh
│ │ ├── DLinear.sh
│ │ ├── ETSformer.sh
│ │ ├── FEDformer.sh
│ │ ├── FiLM.sh
│ │ ├── Informer.sh
│ │ ├── LightTS.sh
│ │ ├── MICN.sh
│ │ ├── MambaSL.out
│ │ ├── MambaSL.sh
│ │ ├── PatchTST.sh
│ │ ├── Pyraformer.sh
│ │ ├── Reformer.sh
│ │ ├── TimesNet.sh
│ │ ├── Transformer.sh
│ │ └── iTransformer.sh
│ ├── exogenous_forecast/
│ │ ├── ECL/
│ │ │ └── TimeXer.sh
│ │ ├── EPF/
│ │ │ └── TimeXer.sh
│ │ ├── ETTh1/
│ │ │ └── TimeXer.sh
│ │ ├── ETTh2/
│ │ │ └── TimeXer.sh
│ │ ├── ETTm1/
│ │ │ └── TimeXer.sh
│ │ ├── ETTm2/
│ │ │ └── TimeXer.sh
│ │ ├── Traffic/
│ │ │ └── TimeXer.sh
│ │ └── Weather/
│ │ └── TimeXer.sh
│ ├── imputation/
│ │ ├── ECL_script/
│ │ │ ├── Autoformer.sh
│ │ │ ├── DLinear.sh
│ │ │ ├── ETSformer.sh
│ │ │ ├── FEDformer.sh
│ │ │ ├── Informer.sh
│ │ │ ├── LightTS.sh
│ │ │ ├── Pyraformer.sh
│ │ │ ├── Reformer.sh
│ │ │ ├── TimesNet.sh
│ │ │ ├── Transformer.sh
│ │ │ └── iTransformer.sh
│ │ ├── ETT_script/
│ │ │ ├── Autoformer_ETTh1.sh
│ │ │ ├── Autoformer_ETTh2.sh
│ │ │ ├── Autoformer_ETTm1.sh
│ │ │ ├── Autoformer_ETTm2.sh
│ │ │ ├── Crossformer_ETTh1.sh
│ │ │ ├── DLinear_ETTh1.sh
│ │ │ ├── FiLM_ETTh1.sh
│ │ │ ├── MICN_ETTh1.sh
│ │ │ ├── Nonstationary_Transformer_ETTh1.sh
│ │ │ ├── TiDE_ETTh1.sh
│ │ │ ├── TimesNet_ETTh1.sh
│ │ │ ├── TimesNet_ETTh2.sh
│ │ │ ├── TimesNet_ETTm1.sh
│ │ │ ├── TimesNet_ETTm2.sh
│ │ │ ├── Transformer_ETTh1.sh
│ │ │ ├── Transformer_ETTh2.sh
│ │ │ ├── Transformer_ETTm1.sh
│ │ │ ├── Transformer_ETTm2.sh
│ │ │ └── iTransformer_ETTh2.sh
│ │ └── Weather_script/
│ │ ├── Autoformer.sh
│ │ ├── TimesNet.sh
│ │ └── Transformer.sh
│ ├── long_term_forecast/
│ │ ├── AugmentSample/
│ │ │ ├── Classification/
│ │ │ │ └── PatchTST.sh
│ │ │ ├── Forecasting/
│ │ │ │ └── PatchTST.sh
│ │ │ └── ReadMe.md
│ │ ├── ECL_script/
│ │ │ ├── Autoformer.sh
│ │ │ ├── Crossformer.sh
│ │ │ ├── DLinear.sh
│ │ │ ├── ETSformer.sh
│ │ │ ├── FEDformer.sh
│ │ │ ├── FiLM.sh
│ │ │ ├── Informer.sh
│ │ │ ├── Koopa.sh
│ │ │ ├── LightTS.sh
│ │ │ ├── MICN.sh
│ │ │ ├── Mamba.sh
│ │ │ ├── MultiPatchFormer.sh
│ │ │ ├── Nonstationary_Transformer.sh
│ │ │ ├── PatchTST.sh
│ │ │ ├── Pyraformer.sh
│ │ │ ├── Reformer.sh
│ │ │ ├── SegRNN.sh
│ │ │ ├── TSMixer.sh
│ │ │ ├── TimeMixer.sh
│ │ │ ├── TimeXer.sh
│ │ │ ├── TimesNet.sh
│ │ │ ├── Transformer.sh
│ │ │ ├── WPMixer.sh
│ │ │ └── iTransformer.sh
│ │ ├── ETT_script/
│ │ │ ├── Autoformer_ETTh1.sh
│ │ │ ├── Autoformer_ETTh2.sh
│ │ │ ├── Autoformer_ETTm1.sh
│ │ │ ├── Autoformer_ETTm2.sh
│ │ │ ├── Crossformer_ETTh1.sh
│ │ │ ├── Crossformer_ETTh2.sh
│ │ │ ├── Crossformer_ETTm1.sh
│ │ │ ├── Crossformer_ETTm2.sh
│ │ │ ├── DLinear_ETTh1.sh
│ │ │ ├── ETSformer_ETTh1.sh
│ │ │ ├── FEDformer_ETTh1.sh
│ │ │ ├── FiLM_ETTh1.sh
│ │ │ ├── FiLM_ETTh2.sh
│ │ │ ├── FiLM_ETTm1.sh
│ │ │ ├── FiLM_ETTm2.sh
│ │ │ ├── Informer_ETTh1.sh
│ │ │ ├── Koopa_ETTh1.sh
│ │ │ ├── Koopa_ETTh2.sh
│ │ │ ├── Koopa_ETTm1.sh
│ │ │ ├── Koopa_ETTm2.sh
│ │ │ ├── LTSM.sh
│ │ │ ├── LightTS_ETTh1.sh
│ │ │ ├── MICN_ETTh1.sh
│ │ │ ├── MICN_ETTh2.sh
│ │ │ ├── MICN_ETTm1.sh
│ │ │ ├── MICN_ETTm2.sh
│ │ │ ├── MambaSimple_ETTh1.sh
│ │ │ ├── Mamba_ETT_all.sh
│ │ │ ├── Mamba_ETTh1.sh
│ │ │ ├── Mamba_ETTh2.sh
│ │ │ ├── Mamba_ETTm1.sh
│ │ │ ├── Mamba_ETTm2.sh
│ │ │ ├── MultiPatchFormer_ETTh1.sh
│ │ │ ├── MultiPatchFormer_ETTm1.sh
│ │ │ ├── Nonstationary_Transformer_ETTh1.sh
│ │ │ ├── Nonstationary_Transformer_ETTh2.sh
│ │ │ ├── Nonstationary_Transformer_ETTm1.sh
│ │ │ ├── Nonstationary_Transformer_ETTm2.sh
│ │ │ ├── PAttn_ETTh1.sh
│ │ │ ├── PatchTST_ETTh1.sh
│ │ │ ├── PatchTST_ETTh2.sh
│ │ │ ├── PatchTST_ETTm1.sh
│ │ │ ├── PatchTST_ETTm2.sh
│ │ │ ├── Pyraformer_ETTh1.sh
│ │ │ ├── Pyraformer_ETTh2.sh
│ │ │ ├── Pyraformer_ETTm1.sh
│ │ │ ├── Pyraformer_ETTm2.sh
│ │ │ ├── Reformer_ETTh1.sh
│ │ │ ├── SegRNN_ETTh1.sh
│ │ │ ├── SegRNN_ETTh2.sh
│ │ │ ├── SegRNN_ETTm1.sh
│ │ │ ├── SegRNN_ETTm2.sh
│ │ │ ├── TSMixer_ETTh1.sh
│ │ │ ├── TSMixer_ETTh2.sh
│ │ │ ├── TSMixer_ETTm1.sh
│ │ │ ├── TSMixer_ETTm2.sh
│ │ │ ├── TiDE_ETTh1.sh
│ │ │ ├── TimeMixer_ETTh1.sh
│ │ │ ├── TimeMixer_ETTh2.sh
│ │ │ ├── TimeMixer_ETTm1.sh
│ │ │ ├── TimeMixer_ETTm2.sh
│ │ │ ├── TimeXer_ETTh1.sh
│ │ │ ├── TimeXer_ETTh2.sh
│ │ │ ├── TimeXer_ETTm1.sh
│ │ │ ├── TimeXer_ETTm2.sh
│ │ │ ├── TimesNet_ETTh1.sh
│ │ │ ├── TimesNet_ETTh2.sh
│ │ │ ├── TimesNet_ETTm1.sh
│ │ │ ├── TimesNet_ETTm2.sh
│ │ │ ├── Transformer_ETTh1.sh
│ │ │ ├── Transformer_ETTh2.sh
│ │ │ ├── Transformer_ETTm1.sh
│ │ │ ├── Transformer_ETTm2.sh
│ │ │ ├── WPMixer_ETTh1.sh
│ │ │ ├── WPMixer_ETTh2.sh
│ │ │ ├── WPMixer_ETTm1.sh
│ │ │ ├── WPMixer_ETTm2.sh
│ │ │ └── iTransformer_ETTh2.sh
│ │ ├── Exchange_script/
│ │ │ ├── Autoformer.sh
│ │ │ ├── Crossformer.sh
│ │ │ ├── FiLM.sh
│ │ │ ├── Koopa.sh
│ │ │ ├── MICN.sh
│ │ │ ├── Mamba.sh
│ │ │ ├── Nonstationary_Transformer.sh
│ │ │ ├── PatchTST.sh
│ │ │ ├── Pyraformer.sh
│ │ │ ├── TimesNet.sh
│ │ │ └── Transformer.sh
│ │ ├── ILI_script/
│ │ │ ├── Autoformer.sh
│ │ │ ├── Crossformer.sh
│ │ │ ├── FiLM.sh
│ │ │ ├── Koopa.sh
│ │ │ ├── MICN.sh
│ │ │ ├── Nonstationary_Transformer.sh
│ │ │ ├── PatchTST.sh
│ │ │ ├── TimesNet.sh
│ │ │ └── Transformer.sh
│ │ ├── Mamba_all.sh
│ │ ├── Traffic_script/
│ │ │ ├── Autoformer.sh
│ │ │ ├── Crossformer.sh
│ │ │ ├── FiLM.sh
│ │ │ ├── Koopa.sh
│ │ │ ├── MICN.sh
│ │ │ ├── Mamba.sh
│ │ │ ├── MultiPatchFormer.sh
│ │ │ ├── Nonstationary_Transformer.sh
│ │ │ ├── PatchTST.sh
│ │ │ ├── Pyraformer.sh
│ │ │ ├── SegRNN.sh
│ │ │ ├── TSMixer.sh
│ │ │ ├── TimeMixer.sh
│ │ │ ├── TimeXer.sh
│ │ │ ├── TimesNet.sh
│ │ │ ├── Transformer.sh
│ │ │ ├── WPMixer.sh
│ │ │ └── iTransformer.sh
│ │ └── Weather_script/
│ │ ├── Autoformer.sh
│ │ ├── Crossformer.sh
│ │ ├── FiLM.sh
│ │ ├── MICN.sh
│ │ ├── Mamba.sh
│ │ ├── MultiPatchFormer.sh
│ │ ├── Nonstationary_Transformer.sh
│ │ ├── PatchTST.sh
│ │ ├── Pyraformer.sh
│ │ ├── SegRNN.sh
│ │ ├── TSMixer.sh
│ │ ├── TimeMixer.sh
│ │ ├── TimeXer.sh
│ │ ├── TimesNet.sh
│ │ ├── Transformer.sh
│ │ ├── WPMixer.sh
│ │ └── iTransformer.sh
│ └── short_term_forecast/
│ ├── Autoformer_M4.sh
│ ├── Crossformer_M4.sh
│ ├── DLinear_M4.sh
│ ├── ETSformer_M4.sh
│ ├── FEDformer_M4.sh
│ ├── FiLM_M4.sh
│ ├── Informer_M4.sh
│ ├── LightTS_M4.sh
│ ├── MICN_M4.sh
│ ├── Mamba_M4.sh
│ ├── Nonstationary_Transformer_M4.sh
│ ├── Pyraformer_M4.sh
│ ├── Reformer_M4.sh
│ ├── TSMixer_M4.sh
│ ├── TimeMixer_M4.sh
│ ├── TimesNet_M4.sh
│ ├── Transformer_M4.sh
│ └── iTransformer_M4.sh
├── tutorial/
│ └── TimesNet_tutorial.ipynb
└── utils/
├── ADFtest.py
├── __init__.py
├── augmentation.py
├── dtw.py
├── dtw_metric.py
├── losses.py
├── m4_summary.py
├── masking.py
├── metrics.py
├── print_args.py
├── timefeatures.py
└── tools.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
/scripts/long_term_forecast/Traffic_script/PatchTST1.sh
/backups/
/result.xlsx
/~$result.xlsx
/Time-Series-Library.zip
/temp.sh
.idea
/tv_result.xlsx
/test.py
/m4_results/
/test_results/
/PatchTST_results.xlsx
/seq_len_long_term_forecast/
/progress.xlsx
/scripts/short_term_forecast/PatchTST_M4.sh
/run_tv.py
/scripts/long_term_forecast/ETT_tv_script/
/dataset/
/data/
data_factory_all.py
data_loader_all.py
/scripts/short_term_forecast/tv_script/
/exp/exp_short_term_forecasting_tv.py
/exp/exp_long_term_forecasting_tv.py
/timesnetv2.xlsx
/scripts/anomaly_detection/tmp/
/scripts/imputation/tmp/
/utils/self_tools.py
/scripts/exp_scripts/
checkpoints/
results/
result_long_term_forecast.txt
result_anomaly_detection.txt
scripts/augmentation/
run_anylearn.py
environment.txt
================================================
FILE: CONTRIBUTING.md
================================================
## Instructions for Contributing to TSlib
Sincerely thanks to all the researchers who want to use or contribute to TSlib.
Since our team may not have enough time to fix all the bugs and catch up with the latest model, your contribution is essential to this project.
### (1) Fix Bug
You can directly propose a pull request and add detailed descriptions to the comment, such as [this pull request](https://github.com/thuml/Time-Series-Library/pull/498).
### (2) Add a new time series model
Thanks to creative researchers, extensive great TS models are presented, which advance this community significantly. If you want to add your model to TSlib, here are some instructions:
- Propose an issue to describe your model and give a link to your paper and official code. We will discuss whether your model is suitable for this library, such as [this issue](https://github.com/thuml/Time-Series-Library/issues/346).
- Propose a pull request in a similar style as TSlib, which means adding an additional file to ./models and providing corresponding scripts for reproduction, such as [this pull request](https://github.com/thuml/Time-Series-Library/pull/446).
Note: Given that there are a lot of TS models that have been proposed, we may not have enough time to judge which model can be a remarkable supplement to the current library. Thus, we decide ONLY to add the officially published paper to our library. Peer review can be a reliable criterion.
Thanks again for your valuable contributions.
================================================
FILE: Dockerfile
================================================
# syntax=docker/dockerfile:1.4
FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-devel AS tslib
WORKDIR /workspace
ARG http_proxy
ARG https_proxy
ENV http_proxy=${http_proxy}
ENV https_proxy=${https_proxy}
ENV PYTHONPATH=/workspace/Time-Series-Library:$PYTHONPATH
COPY requirements.txt .
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt
# mamba-ssm (cxx11abiFALSE) (Time-Series-Library/models/Mamba.py)
RUN --mount=type=cache,target=/root/.cache/pip \
pip install https://github.com/state-spaces/mamba/releases/download/v2.2.6.post3/mamba_ssm-2.2.6.post3+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
# uni2ts (--no-deps)(Time-Series-Library/models/Moirai.py)
RUN --mount=type=cache,target=/root/.cache/pip \
pip install uni2ts --no-deps
COPY . .
CMD ["bash"]
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2021 THUML @ Tsinghua University
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Time Series Library (TSLib)
TSLib is an open-source library for deep learning researchers, especially for deep time series analysis.
> **中文文档**:[README_zh.md](./README_zh.md)
We provide a neat code base to evaluate advanced deep time series models or develop your model, which covers five mainstream tasks: **long- and short-term forecasting, imputation, anomaly detection, and classification.**
:triangular_flag_on_post:**News** (2025.12) Many thanks to the great work from [ailuntz](https://github.com/thuml/Time-Series-Library/pull/805), which provides an updated requirements and docker deployment, as well as a well-organized document. This is quite meaningful to this project and beginners.
:triangular_flag_on_post:**News** (2025.11) Considering the rapid development of Large Time Series Models (LTSMs), we have newly added a [[zero-shot forecasting]](https://github.com/thuml/Time-Series-Library/blob/main/exp/exp_zero_shot_forecasting.py) feature in TSLib. You can try [this script](https://github.com/thuml/Time-Series-Library/blob/main/scripts/long_term_forecast/ETT_script/LTSM.sh) to evaluate LTSMs.
:triangular_flag_on_post:**News** (2025.10) Given the recent confusion among researchers regarding minor improvements on standard benchmarks, we propose the [[Accuracy Law]](https://arxiv.org/abs/2510.02729) to characterize the objectives of deep time series forecasting tasks, which can be used to identify saturated datasets.
:triangular_flag_on_post:**News** (2024.10) We have included [[TimeXer]](https://arxiv.org/abs/2402.19072), which defined a practical forecasting paradigm: Forecasting with Exogenous Variables. Considering both practicability and computation efficiency, we believe the new forecasting paradigm defined in TimeXer can be the "right" task for future research.
:triangular_flag_on_post:**News** (2024.10) Our lab has open-sourced [[OpenLTM]](https://github.com/thuml/OpenLTM), which provides a distinct pretrain-finetuning paradigm compared to TSLib. If you are interested in Large Time Series Models, you may find this repository helpful.
:triangular_flag_on_post:**News** (2024.07) We wrote a comprehensive survey of [[Deep Time Series Models]](https://arxiv.org/abs/2407.13278) with a rigorous benchmark based on TSLib. In this paper, we summarized the design principles of current time series models supported by insightful experiments, hoping to be helpful to future research.
:triangular_flag_on_post:**News** (2024.04) Many thanks for the great work from [frecklebars](https://github.com/thuml/Time-Series-Library/pull/378). The famous sequential model [Mamba](https://arxiv.org/abs/2312.00752) has been included in our library. See [this file](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py), where you need to install `mamba_ssm` with pip at first.
:triangular_flag_on_post:**News** (2024.03) Given the inconsistent look-back length of various papers, we split the long-term forecasting in the leaderboard into two categories: Look-Back-96 and Look-Back-Searching. We recommend researchers read [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2), which includes both look-back length settings in experiments for scientific rigor.
:triangular_flag_on_post:**News** (2023.10) We add an implementation to [iTransformer](https://arxiv.org/abs/2310.06625), which is the state-of-the-art model for long-term forecasting. The official code and complete scripts of iTransformer can be found [here](https://github.com/thuml/iTransformer).
:triangular_flag_on_post:**News** (2023.09) We added a detailed [tutorial](https://github.com/thuml/Time-Series-Library/blob/main/tutorial/TimesNet_tutorial.ipynb) for [TimesNet](https://openreview.net/pdf?id=ju_Uqw384Oq) and this library, which is quite friendly to beginners of deep time series analysis.
:triangular_flag_on_post:**News** (2023.02) We release the TSlib as a comprehensive benchmark and code base for time series models, which is extended from our previous GitHub repository [Autoformer](https://github.com/thuml/Autoformer).
## Leaderboard for Time Series Analysis
Till March 2024, the top three models for five different tasks are:
| Model
Ranking | Long-term
Forecasting
Look-Back-96 | Long-term
Forecasting
Look-Back-Searching | Short-term
Forecasting | Imputation | Classification | Anomaly
Detection |
| ---------------- | ----------------------------------------------------- | ----------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------------------------------------------------- |
| 🥇 1st | [TimeXer](https://arxiv.org/abs/2402.19072) | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) |
| 🥈 2nd | [iTransformer](https://arxiv.org/abs/2310.06625) | [PatchTST](https://github.com/yuqinie98/PatchTST) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [FEDformer](https://github.com/MAZiqing/FEDformer) |
| 🥉 3rd | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [DLinear](https://arxiv.org/pdf/2205.13504.pdf) | [FEDformer](https://github.com/MAZiqing/FEDformer) | [Autoformer](https://github.com/thuml/Autoformer) | [Informer](https://github.com/zhouhaoyi/Informer2020) | [Autoformer](https://github.com/thuml/Autoformer) |
**Note: We will keep updating this leaderboard.** If you have proposed advanced and awesome models, you can send us your paper/code link or raise a pull request. We will add them to this repo and update the leaderboard as soon as possible.
**Compared models of this leaderboard.** ☑ means that their codes have already been included in this repo.
- [x] **TimeXer** - TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables [[NeurIPS 2024]](https://arxiv.org/abs/2402.19072) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeXer.py)
- [x] **TimeMixer** - TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting [[ICLR 2024]](https://openreview.net/pdf?id=7oLshfEIC2) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMixer.py).
- [x] **TSMixer** - TSMixer: An All-MLP Architecture for Time Series Forecasting [[arXiv 2023]](https://arxiv.org/pdf/2303.06053.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TSMixer.py)
- [x] **iTransformer** - iTransformer: Inverted Transformers Are Effective for Time Series Forecasting [[ICLR 2024]](https://arxiv.org/abs/2310.06625) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/iTransformer.py).
- [x] **PatchTST** - A Time Series is Worth 64 Words: Long-term Forecasting with Transformers [[ICLR 2023]](https://openreview.net/pdf?id=Jbdc0vTOcol) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/PatchTST.py).
- [x] **TimesNet** - TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis [[ICLR 2023]](https://openreview.net/pdf?id=ju_Uqw384Oq) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesNet.py).
- [x] **DLinear** - Are Transformers Effective for Time Series Forecasting? [[AAAI 2023]](https://arxiv.org/pdf/2205.13504.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/DLinear.py).
- [x] **LightTS** - Less Is More: Fast Multivariate Time Series Forecasting with Light Sampling-oriented MLP Structures [[arXiv 2022]](https://arxiv.org/abs/2207.01186) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/LightTS.py).
- [x] **ETSformer** - ETSformer: Exponential Smoothing Transformers for Time-series Forecasting [[arXiv 2022]](https://arxiv.org/abs/2202.01381) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/ETSformer.py).
- [x] **Non-stationary Transformer** - Non-stationary Transformers: Exploring the Stationarity in Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/pdf?id=ucNDIDRNjjv) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Nonstationary_Transformer.py).
- [x] **FEDformer** - FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting [[ICML 2022]](https://proceedings.mlr.press/v162/zhou22g.html) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FEDformer.py).
- [x] **Pyraformer** - Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting [[ICLR 2022]](https://openreview.net/pdf?id=0EXmFzUn5I) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Pyraformer.py).
- [x] **Autoformer** - Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting [[NeurIPS 2021]](https://openreview.net/pdf?id=I55UqU-M11y) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Autoformer.py).
- [x] **Informer** - Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting [[AAAI 2021]](https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Informer.py).
- [x] **Reformer** - Reformer: The Efficient Transformer [[ICLR 2020]](https://openreview.net/forum?id=rkgNKkHtvB) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Reformer.py).
- [x] **Transformer** - Attention is All You Need [[NeurIPS 2017]](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Transformer.py).
See our latest paper [[TimesNet]](https://arxiv.org/abs/2210.02186) for the comprehensive benchmark. We will release a real-time updated online version soon.
**Newly added baselines.** We will add them to the leaderboard after a comprehensive evaluation.
- [x] **MambaSL** - MambaSL: Exploring Single-Layer Mamba for Time Series Classification [[ICLR 2026]](https://openreview.net/forum?id=YDl4vqQqGP) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MambaSingleLayer.py)
- [x] **TimeFilter** - TimeFilter: Patch-Specific Spatial-Temporal Graph Filtration for Time Series Forecasting [[ICML 2025]](https://arxiv.org/abs/2501.13041) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeFilter.py)
- [x] **KAN-AD** - KAN-AD: Time Series Anomaly Detection with Kolmogorov-Arnold Networks [[ICML 2025]](https://arxiv.org/abs/2411.00278) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/KANAD.py)
- [x] **MultiPatchFormer** - A multiscale model for multivariate time series forecasting [[Scientific Reports 2025]](https://www.nature.com/articles/s41598-024-82417-4) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MultiPatchFormer.py)
- [x] **WPMixer** - WPMixer: Efficient Multi-Resolution Mixing for Long-Term Time Series Forecasting [[AAAI 2025]](https://arxiv.org/abs/2412.17176) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/WPMixer.py)
- [x] **MSGNet** - MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting [[AAAI 2024]](https://dl.acm.org/doi/10.1609/aaai.v38i10.28991) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MSGNet.py)
- [x] **PAttn** - Are Language Models Actually Useful for Time Series Forecasting? [[NeurIPS 2024]](https://arxiv.org/pdf/2406.16964) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/PAttn.py)
- [x] **Mamba** - Mamba: Linear-Time Sequence Modeling with Selective State Spaces [[arXiv 2023]](https://arxiv.org/abs/2312.00752) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py)
- [x] **SegRNN** - SegRNN: Segment Recurrent Neural Network for Long-Term Time Series Forecasting [[arXiv 2023]](https://arxiv.org/abs/2308.11200.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/SegRNN.py).
- [x] **Koopa** - Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors [[NeurIPS 2023]](https://arxiv.org/pdf/2305.18803.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Koopa.py).
- [x] **FreTS** - Frequency-domain MLPs are More Effective Learners in Time Series Forecasting [[NeurIPS 2023]](https://arxiv.org/pdf/2311.06184.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FreTS.py).
- [x] **MICN** - MICN: Multi-scale Local and Global Context Modeling for Long-term Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=zt53IDUR1U)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MICN.py).
- [x] **Crossformer** - Crossformer: Transformer Utilizing Cross-Dimension Dependency for Multivariate Time Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=vSVLM2j9eie)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Crossformer.py).
- [x] **TiDE** - Long-term Forecasting with TiDE: Time-series Dense Encoder [[arXiv 2023]](https://arxiv.org/pdf/2304.08424.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiDE.py).
- [x] **SCINet** - SCINet: Time Series Modeling and Forecasting with Sample Convolution and Interaction [[NeurIPS 2022]](https://openreview.net/pdf?id=AyajSjTAzmg)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/SCINet.py).
- [x] **FiLM** - FiLM: Frequency improved Legendre Memory Model for Long-term Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/forum?id=zTQdHSQUQWc)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FiLM.py).
- [x] **TFT** - Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting [[arXiv 2019]](https://arxiv.org/abs/1912.09363)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TemporalFusionTransformer.py).
**Newly added Large Time Series Models.** This library also supports the zero-shot evaluation of the following LTSMs.
- [x] **Chronos2** - Chronos-2: From Univariate to Universal Forecasting [[arXiv 2025]](https://arxiv.org/abs/2510.15821) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos2.py)
- [x] **TiRex** - TiRex: Zero-Shot Forecasting Across Long and Short Horizons with Enhanced In-Context Learning [[NeurIPS 2025]](https://arxiv.org/pdf/2505.23719) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiRex.py)
- [x] **Sundial** - Sundial: A Family of Highly Capable Time Series Foundation Models [[ICML 2025]](https://arxiv.org/pdf/2502.00816) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Sundial.py)
- [x] **Time-MoE** - Time-MoE: Billion-Scale Time Series Foundation Models with Mixture of Experts [[ICLR 2025]](https://arxiv.org/pdf/2409.16040) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMoE.py)
- [x] **Toto** - Toto: Time Series Optimized Transformer for Observability [arXiv 2024](https://arxiv.org/pdf/2407.07874)
- [x] **Chronos** - Chronos: Learning the Language of Time Series [[TMLR 2024]](https://arxiv.org/pdf/2403.07815) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos.py)
- [x] **Moirai** - Unified Training of Universal Time Series Forecasting Transformers [[ICML 2024]](https://arxiv.org/pdf/2402.02592)
- [x] **TimesFM** - A decoder-only foundation model for time-series forecasting [[ICML 2024]](https://arxiv.org/abs/2310.10688) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesFM.py)
## Getting Started
### Prepare Data
You can obtain the well-preprocessed datasets from [[Google Drive]](https://drive.google.com/drive/folders/13Cg1KYOlzM5C7K8gK8NfC-F3EYxkM3D2?usp=sharing), [[Baidu Drive]](https://pan.baidu.com/s/1r3KhGd0Q9PJIUZdfEYoymg?pwd=i9iy) or [[Hugging Face]](https://huggingface.co/datasets/thuml/Time-Series-Library). Then place the downloaded data in the folder `./dataset`.
### Installation
1. Clone this repository.
```bash
git clone https://github.com/thuml/Time-Series-Library.git
cd Time-Series-Library
```
2. Create a new Conda environment.
```bash
conda create -n tslib python=3.11
conda activate tslib
```
3. Install Core Dependencies
> ⚠️ **CUDA Compatibility Notice**
> The torch prebuilt package is **CUDA-version specific**. (See https://pytorch.org/get-started/previous-versions/)
> Please make sure to install the package that matches your local CUDA version (e.g., `cu118` or `cu121`).
> Recommended: torch==2.5.1
```bash
pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
pip install -r requirements.txt
```
4. Install Dependencies for Mamba Model (Required for Time-Series-Library/models/Mamba.py)
> ⚠️ **Linux only**
> ⚠️ **CUDA Compatibility Notice**
> The prebuilt Mamba wheel is **CUDA-version specific**.
> Please make sure to install the wheel that matches your local CUDA version
> (e.g., `cu11` or `cu12`). Installing a mismatched version may result in
> runtime errors or import failures.
Example for **CUDA 12**:
```bash
pip install https://github.com/state-spaces/mamba/releases/download/v2.2.6.post3/mamba_ssm-2.2.6.post3+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
```
5. Install Dependencies for Moirai Model (Required for Time-Series-Library/models/Moirai.py)
```bash
pip install uni2ts --no-deps
```
### Docker Deployment
```bash
# Build and start the Docker container in detached mode
docker compose -f 'Time-Series-Library/docker-compose.yml' up -d --build
# Download / place the dataset into a newly created folder ./dataset at the repository root
mkdir -p dataset # create the dataset directory
# Copy the local dataset into the container at /workspace/dataset
docker cp ./dataset tslib:/workspace/dataset
# Enter the running container to continue training / evaluation
docker exec -it tslib bash
# Switch to the workspace directory inside the container
cd /workspace
# Run zero-shot forecasting with the pre-trained Moirai model
python -u run.py \
--task_name zero_shot_forecast \ # task type: zero-shot forecasting
--is_training 0 \ # 0 = inference only (no training)
--root_path ./dataset/ETT-small/ \ # root directory of the dataset
--data_path ETTh1.csv \ # dataset file name
--model_id ETTh1_512_96 \ # experiment/model identifier
--model Moirai \ # model name (TimesFM / Moirai)
--data ETTh1 \ # dataset name
--features M \ # multivariate forecasting
--seq_len 512 \ # input sequence length
--pred_len 96 \ # prediction horizon
--enc_in 7 \ # number of input variables
--des 'Exp' \ # experiment description
--itr 1 # number of runs
```
### Quick Test
Quick test for all 5 tasks (1 epoch each):
```bash
# Run quick tests for all 5 tasks
export CUDA_VISIBLE_DEVICES=0
# 1. Long-term forecasting
python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_long --model DLinear --data ETTh1 --features M --seq_len 96 --pred_len 96 --enc_in 7 --dec_in 7 --c_out 7 --train_epochs 1 --num_workers 2
# 2. Short-term forecasting (using ETT dataset with shorter prediction length)
python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_short --model TimesNet --data ETTh1 --features M --seq_len 24 --label_len 12 --pred_len 24 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 5 --train_epochs 1 --num_workers 2
# 3. Imputation
python -u run.py --task_name imputation --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_imp --model TimesNet --data ETTh1 --features M --seq_len 96 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 3 --train_epochs 1 --num_workers 2 --label_len 0 --pred_len 0 --mask_rate 0.125 --learning_rate 0.001
# 4. Anomaly detection
python -u run.py --task_name anomaly_detection --is_training 1 --root_path ./dataset/PSM --model_id test_ad --model TimesNet --data PSM --features M --seq_len 100 --pred_len 0 --d_model 64 --d_ff 64 --e_layers 2 --enc_in 25 --c_out 25 --anomaly_ratio 1.0 --top_k 3 --train_epochs 1 --batch_size 128 --num_workers 2
# 5. Classification
python -u run.py --task_name classification --is_training 1 --root_path ./dataset/Heartbeat/ --model_id Heartbeat --model TimesNet --data UEA --e_layers 2 --d_layers 1 --factor 3 --d_model 64 --d_ff 128 --top_k 3 --train_epochs 1 --batch_size 16 --learning_rate 0.001 --num_workers 0
```
### Train and Evaluate
We provide the experiment scripts for all benchmarks under the folder `./scripts/`. You can reproduce the experiment results as the following examples:
> ⚠️ Some scripts have `CUDA_VISIBLE_DEVICES` set by default. Please modify or remove this setting according to your actual GPU configuration, otherwise it may prevent GPU usage.
```bash
# long-term forecast
bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh
# short-term forecast
bash ./scripts/short_term_forecast/TimesNet_M4.sh
# imputation
bash ./scripts/imputation/ETT_script/TimesNet_ETTh1.sh
# anomaly detection
bash ./scripts/anomaly_detection/PSM/TimesNet.sh
# classification
bash ./scripts/classification/TimesNet.sh
```
### Develop Your Own Model
- Add the model file to the folder `./models`. You can follow the `./models/Transformer.py`.
- Create the corresponding scripts under the folder `./scripts`.
### Note:
(1) About classification: Since we include all five tasks in a unified code base, the accuracy of each subtask may fluctuate but the average performance can be reproduced (even a bit better). We have provided the reproduced checkpoints [here](https://github.com/thuml/Time-Series-Library/issues/494).
(2) About anomaly detection: Some discussion about the adjustment strategy in anomaly detection can be found [here](https://github.com/thuml/Anomaly-Transformer/issues/14). The key point is that the adjustment strategy corresponds to an event-level metric.
### Inspect the project structure:
```
Time-Series-Library/
├── README.md # Official README with tasks, leaderboard, usage
├── requirements.txt # pip dependency list for quick environment setup
├── LICENSE / CONTRIBUTING.md # Upstream license and contribution guide
├── run.py # Unified entry that parses args and dispatches tasks
├── exp/ # Task pipelines wrapping train/val/test
│ ├── exp_basic.py # Experiment base class, registers models, builds flows
│ ├── exp_long_term_forecasting.py # Long-term forecasting logic
│ ├── exp_short_term_forecasting.py # Short-term forecasting logic
│ ├── exp_imputation.py # Missing-value imputation
│ ├── exp_anomaly_detection.py # Anomaly detection
│ ├── exp_classification.py # Classification
│ └── exp_zero_shot_forecasting.py # LTSM zero-shot evaluation
├── data_provider/ # Dataset loaders and splits
│ ├── data_factory.py # Chooses the proper DataLoader per task
│ ├── data_loader.py # Generic TS reader with sliding-window logic
│ ├── uea.py / m4.py # Parsers for UEA, M4 and other formats
│ └── __init__.py # Exposes factory interfaces upward
├── models/ # All model implementations
│ ├── TimesNet.py, TimeMixer.py # Main forecasting models
│ ├── Chronos2.py, TiRex.py # LTSM zero-shot models
│ └── __init__.py # Enables name-based instantiation inside exp
├── layers/ # Reusable attention / conv / embedding blocks
│ ├── Transformer_EncDec.py # Transformer stacks
│ ├── AutoCorrelation.py # Auto-correlation operator
│ ├── MultiWaveletCorrelation.py# Frequency-domain unit
│ └── Embed.py etc. # Shared primitives
├── utils/ # Utility toolbox
│ ├── metrics.py # MSE / MAE / DTW and other metrics
│ ├── tools.py # General helpers such as EarlyStopping
│ ├── augmentation.py # Augmentations for classification / detection
│ ├── print_args.py # Unified argument printer
│ └── masking.py / losses.py # Task-specific helpers
├── scripts/ # Bash recipes for reproducible experiments
│ ├── long_term_forecast/ # Long-term forecasting per dataset/model
│ ├── short_term_forecast/ # M4 and other short-term scripts
│ ├── imputation/ # Imputation scripts
│ ├── anomaly_detection/ # SMD / SMAP / SWAT detection scripts
│ ├── classification/ # UEA classification scripts
│ └── exogenous_forecast/ # TimeXer exogenous forecasting flow
├── tutorial/ # TimesNet tutorial notebook and figures
└── pic/ # README figures (dataset overview, etc.)
```
### Understand the project architecture:
- **E2E flow**: configure experiments via `scripts/*.sh` → run `python run.py ...` → `run.py` parses arguments and selects the proper `Exp_*` via `task_name` → the experiment builds datasets through `data_provider`, instantiates networks from `models`, and drives train/val/test with utilities in `utils` → metrics and checkpoints are written to `./checkpoints`.
- **Experiment layer (`exp/`)**: `Exp_Basic` registers models and devices; subclasses implement `_get_data`, `train`, and `test` to encapsulate task-specific differences so the same model can be reused.
- **Model & layer layer (`models/` + `layers/`)**: model files define architectures, while reusable attention/conv/frequency components live in `layers/` to minimize duplication.
- **Data layer (`data_provider/`)**: `data_factory` returns the correct `Dataset/DataLoader`; `data_loader` handles windowing, masking, and sampling, with arguments controlling window length, missing ratio, anomaly ratio, etc.
- **Script layer (`scripts/`)**: bash scripts capture paper configurations (dataset, window, model, GPU) for reproducibility and serve as templates for custom runs.
- **Utility layer (`utils/`)**: `metrics` centralizes evaluation, `tools` bundles essentials like `EarlyStopping` and `adjust_learning_rate`, while `augmentation`/`masking` cover task-specific preprocessing.
- **Learning path**: recommended reading order is `scripts -> run.py -> exp/exp_basic.py -> corresponding Exp subclass -> data_provider -> models`, using `tutorial/TimesNet_tutorial.ipynb` as a guided walkthrough before diving deeper.
## Citation
If you find this repo useful, please cite our paper.
```
@inproceedings{wu2023timesnet,
title={TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis},
author={Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long},
booktitle={International Conference on Learning Representations},
year={2023},
}
@article{wang2024tssurvey,
title={Deep Time Series Models: A Comprehensive Survey and Benchmark},
author={Yuxuan Wang and Haixu Wu and Jiaxiang Dong and Yong Liu and Mingsheng Long and Jianmin Wang},
booktitle={arXiv preprint arXiv:2407.13278},
year={2024},
}
```
## Contact
If you have any questions or suggestions, feel free to contact our maintenance team:
Current:
- Haixu Wu (Ph.D., wuhaixu98@gmail.com)
- Yuxuan Wang (Ph.D. student, wangyuxu22@mails.tsinghua.edu.cn)
- Yong Liu (Ph.D. student, liuyong21@mails.tsinghua.edu.cn)
- Ailuntz (Student from Open-source Community, ailuntz@icloud.com)
Previous:
- Huikun Weng (Undergraduate, wenghk22@mails.tsinghua.edu.cn)
- Tengge Hu (Master student, htg21@mails.tsinghua.edu.cn)
- Haoran Zhang (Master student, z-hr20@mails.tsinghua.edu.cn)
- Jiawei Guo (Undergraduate, guo-jw21@mails.tsinghua.edu.cn)
Or describe it in Issues.
## Acknowledgement
This library is constructed based on the following repos:
- Forecasting: https://github.com/thuml/Autoformer.
- Anomaly Detection: https://github.com/thuml/Anomaly-Transformer.
- Classification: https://github.com/thuml/Flowformer.
All the experiment datasets are public, and we obtain them from the following links:
- Long-term Forecasting and Imputation: https://github.com/thuml/Autoformer.
- Short-term Forecasting: https://github.com/ServiceNow/N-BEATS.
- Anomaly Detection: https://github.com/thuml/Anomaly-Transformer.
- Classification: https://www.timeseriesclassification.com/.
## All Thanks To Our Contributors
================================================
FILE: README_zh.md
================================================
# 时间序列库(TSLib)
TSLib 是一个面向深度学习研究者的开源库,特别适用于深度时间序列分析。
> **English README**:[README.md](./README.md)
我们提供了一个整洁的代码库,用于评测先进的深度时间序列模型或开发自定义模型,覆盖 **长短期预测、插补、异常检测和分类** 等五大主流任务。
:triangular_flag_on_post:**最新动态**(2025.12)非常感谢 [ailuntz](https://github.com/thuml/Time-Series-Library/pull/805) 的杰出贡献,提供了更新的依赖要求和 Docker 部署,以及完善的文档。这对本项目和初学者都很有意义。
:triangular_flag_on_post:**最新动态**(2025.11)鉴于大型时间序列模型(LTSM)的快速发展,我们在 TSLib 中新增了[[零样本预测]](https://github.com/thuml/Time-Series-Library/blob/main/exp/exp_zero_shot_forecasting.py)功能,可参考 [此脚本](https://github.com/thuml/Time-Series-Library/blob/main/scripts/long_term_forecast/ETT_script/LTSM.sh) 评测 LTSM。
:triangular_flag_on_post:**最新动态**(2025.10)针对近期研究者在标准基准上追求微小提升而产生的困惑,我们提出了[[精度定律]](https://arxiv.org/abs/2510.02729),以刻画深度时间序列预测任务的目标,并可据此识别已饱和的数据集。
:triangular_flag_on_post:**最新动态**(2024.10)我们已纳入 [[TimeXer]](https://arxiv.org/abs/2402.19072),其定义了一个实用的预测范式:带外生变量的预测。考虑到实用性与计算效率,我们认为 TimeXer 所定义的新范式将成为未来研究的“正确”任务。
:triangular_flag_on_post:**最新动态**(2024.10)实验室已开源 [[OpenLTM]](https://github.com/thuml/OpenLTM),提供了有别于 TSLib 的预训练 - 微调范式。如果您对大型时间序列模型感兴趣,该仓库值得参考。
:triangular_flag_on_post:**最新动态**(2024.07)我们撰写了关于[[深度时间序列模型]](https://arxiv.org/abs/2407.13278)的综述,并基于 TSLib 构建了严谨的基准。论文总结了当前时间序列模型的设计原则,并通过深入实验验证,期望对未来研究有所帮助。
:triangular_flag_on_post:**最新动态**(2024.04)感谢 [frecklebars](https://github.com/thuml/Time-Series-Library/pull/378) 的贡献,著名的序列模型 [Mamba](https://arxiv.org/abs/2312.00752) 已加入本库。参见[该文件](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py),需要先用 pip 安装 `mamba_ssm`。
:triangular_flag_on_post:**最新动态**(2024.03)鉴于各论文使用的回溯窗口长度不一致,我们将排行榜中的长期预测拆分为 Look-Back-96 与 Look-Back-Searching 两类。建议阅读 [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2),其实验同时包含两种窗口设置,更具科学性。
:triangular_flag_on_post:**最新动态**(2023.10)我们添加了 [iTransformer](https://arxiv.org/abs/2310.06625) 的实现,这是长期预测领域的最新 SOTA。官方代码与完整脚本参见 [此处](https://github.com/thuml/iTransformer)。
:triangular_flag_on_post:**最新动态**(2023.09)我们为 [TimesNet](https://openreview.net/pdf?id=ju_Uqw384Oq) 及本库添加了详细[教程](https://github.com/thuml/Time-Series-Library/blob/main/tutorial/TimesNet_tutorial.ipynb),对时间序列初学者十分友好。
:triangular_flag_on_post:**最新动态**(2023.02)我们发布了 TSlib,作为一个面向时间序列模型的综合基准与代码库,扩展自此前的 [Autoformer](https://github.com/thuml/Autoformer) 仓库。
## 时间序列分析排行榜
截至 2024 年 3 月,各任务排行榜前三名如下:
| 模型
排名 | 长期预测
Look-Back-96 | 长期预测
Look-Back-Searching | 短期预测 | 插补 | 分类 | 异常检测 |
| ------------ | ------------------------ | -------------------------------- | -------- | ---- | ---- | -------- |
| 🥇 第一名 | [TimeXer](https://arxiv.org/abs/2402.19072) | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) | [TimesNet](https://arxiv.org/abs/2210.02186) |
| 🥈 第二名 | [iTransformer](https://arxiv.org/abs/2310.06625) | [PatchTST](https://github.com/yuqinie98/PatchTST) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary
Transformer](https://github.com/thuml/Nonstationary_Transformers) | [FEDformer](https://github.com/MAZiqing/FEDformer) |
| 🥉 第三名 | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [DLinear](https://arxiv.org/pdf/2205.13504.pdf) | [FEDformer](https://github.com/MAZiqing/FEDformer) | [Autoformer](https://github.com/thuml/Autoformer) | [Informer](https://github.com/zhouhaoyi/Informer2020) | [Autoformer](https://github.com/thuml/Autoformer) |
**说明:排行榜会持续更新。** 如果您提出了先进的模型,可通过发送论文或代码链接、或提交 PR 与我们联系,我们会尽快将其加入仓库并更新排行榜。
**排行榜中的对比模型**(☑ 表示代码已收录)。
- [x] **TimeXer** - TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables [[NeurIPS 2024]](https://arxiv.org/abs/2402.19072) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeXer.py)
- [x] **TimeMixer** - TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting [[ICLR 2024]](https://openreview.net/pdf?id=7oLshfEIC2) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMixer.py)
- [x] **TSMixer** - TSMixer: An All-MLP Architecture for Time Series Forecasting [[arXiv 2023]](https://arxiv.org/pdf/2303.06053.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TSMixer.py)
- [x] **iTransformer** - iTransformer: Inverted Transformers Are Effective for Time Series Forecasting [[ICLR 2024]](https://arxiv.org/abs/2310.06625) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/iTransformer.py)
- [x] **PatchTST** - A Time Series is Worth 64 Words: Long-term Forecasting with Transformers [[ICLR 2023]](https://openreview.net/pdf?id=Jbdc0vTOcol) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/PatchTST.py)
- [x] **TimesNet** - TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis [[ICLR 2023]](https://openreview.net/pdf?id=ju_Uqw384Oq) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesNet.py)
- [x] **DLinear** - Are Transformers Effective for Time Series Forecasting? [[AAAI 2023]](https://arxiv.org/pdf/2205.13504.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/DLinear.py)
- [x] **LightTS** - Less Is More: Fast Multivariate Time Series Forecasting with Light Sampling-oriented MLP Structures [[arXiv 2022]](https://arxiv.org/abs/2207.01186) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/LightTS.py)
- [x] **ETSformer** - ETSformer: Exponential Smoothing Transformers for Time-series Forecasting [[arXiv 2022]](https://arxiv.org/abs/2202.01381) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/ETSformer.py)
- [x] **Non-stationary Transformer** - Non-stationary Transformers: Exploring the Stationarity in Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/pdf?id=ucNDIDRNjjv) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Nonstationary_Transformer.py)
- [x] **FEDformer** - FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting [[ICML 2022]](https://proceedings.mlr.press/v162/zhou22g.html) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/FEDformer.py)
- [x] **Pyraformer** - Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting [[ICLR 2022]](https://openreview.net/pdf?id=0EXmFzUn5I) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Pyraformer.py)
- [x] **Autoformer** - Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting [[NeurIPS 2021]](https://openreview.net/pdf?id=I55UqU-M11y) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Autoformer.py)
- [x] **Informer** - Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting [[AAAI 2021]](https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Informer.py)
- [x] **Reformer** - Reformer: The Efficient Transformer [[ICLR 2020]](https://openreview.net/forum?id=rkgNKkHtvB) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Reformer.py)
- [x] **Transformer** - Attention is All You Need [[NeurIPS 2017]](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Transformer.py)
更多详情可参考我们关于 [[TimesNet]](https://arxiv.org/abs/2210.02186) 的最新论文,实时在线版本即将发布。
**新增基线模型**(综合评测后将加入排行榜)。
- [x] **MambaSL** - MambaSL: Exploring Single-Layer Mamba for Time Series Classification [[ICLR 2026]](https://openreview.net/forum?id=YDl4vqQqGP) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MambaSingleLayer.py)
- [x] **TimeFilter** - TimeFilter: Patch-Specific Spatial-Temporal Graph Filtration for Time Series Forecasting [[ICML 2025]](https://arxiv.org/abs/2501.13041) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeFilter.py)
- [x] **KAN-AD** - KAN-AD: Time Series Anomaly Detection with Kolmogorov-Arnold Networks [[ICML 2025]](https://arxiv.org/abs/2411.00278) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/KANAD.py)
- [x] **MultiPatchFormer** - A multiscale model for multivariate time series forecasting [[Scientific Reports 2025]](https://www.nature.com/articles/s41598-024-82417-4) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/MultiPatchFormer.py)
- [x] **WPMixer** - WPMixer: Efficient Multi-Resolution Mixing for Long-Term Time Series Forecasting [[AAAI 2025]](https://arxiv.org/abs/2412.17176) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/WPMixer.py)
- [x] **MSGNet** - MSGNet: Learning Multi-Scale Inter-Series Correlations for Multivariate Time Series Forecasting [[AAAI 2024]](https://dl.acm.org/doi/10.1609/aaai.v38i10.28991) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/MSGNet.py)
- [x] **PAttn** - Are Language Models Actually Useful for Time Series Forecasting? [[NeurIPS 2024]](https://arxiv.org/pdf/2406.16964) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/PAttn.py)
- [x] **Mamba** - Mamba: Linear-Time Sequence Modeling with Selective State Spaces [[arXiv 2023]](https://arxiv.org/abs/2312.00752) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py)
- [x] **SegRNN** - SegRNN: Segment Recurrent Neural Network for Long-Term Time Series Forecasting [[arXiv 2023]](https://arxiv.org/abs/2308.11200.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/SegRNN.py)
- [x] **Koopa** - Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors [[NeurIPS 2023]](https://arxiv.org/pdf/2305.18803.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Koopa.py)
- [x] **FreTS** - Frequency-domain MLPs are More Effective Learners in Time Series Forecasting [[NeurIPS 2023]](https://arxiv.org/pdf/2311.06184.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/FreTS.py)
- [x] **MICN** - MICN: Multi-scale Local and Global Context Modeling for Long-term Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=zt53IDUR1U) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/MICN.py)
- [x] **Crossformer** - Crossformer: Transformer Utilizing Cross-Dimension Dependency for Multivariate Time Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=vSVLM2j9eie) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Crossformer.py)
- [x] **TiDE** - Long-term Forecasting with TiDE: Time-series Dense Encoder [[arXiv 2023]](https://arxiv.org/pdf/2304.08424.pdf) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiDE.py)
- [x] **SCINet** - SCINet: Time Series Modeling and Forecasting with Sample Convolution and Interaction [[NeurIPS 2022]](https://openreview.net/pdf?id=AyajSjTAzmg) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/SCINet.py)
- [x] **FiLM** - FiLM: Frequency improved Legendre Memory Model for Long-term Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/forum?id=zTQdHSQUQWc) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/FiLM.py)
- [x] **TFT** - Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting [[arXiv 2019]](https://arxiv.org/abs/1912.09363) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TemporalFusionTransformer.py)
**新增大型时间序列模型**。本库同样支持以下 LTSM 的零样本评测:
- [x] **Chronos2** - Chronos-2: From Univariate to Universal Forecasting [[arXiv 2025]](https://arxiv.org/abs/2510.15821) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos2.py)
- [x] **TiRex** - TiRex: Zero-Shot Forecasting Across Long and Short Horizons with Enhanced In-Context Learning [[NeurIPS 2025]](https://arxiv.org/pdf/2505.23719) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiRex.py)
- [x] **Sundial** - Sundial: A Family of Highly Capable Time Series Foundation Models [[ICML 2025]](https://arxiv.org/pdf/2502.00816) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Sundial.py)
- [x] **Time-MoE** - Time-MoE: Billion-Scale Time Series Foundation Models with Mixture of Experts [[ICLR 2025]](https://arxiv.org/pdf/2409.16040) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMoE.py)
- [x] **Toto** - Toto: Time Series Optimized Transformer for Observability [[arXiv 2024]](https://arxiv.org/pdf/2407.07874)
- [x] **Chronos** - Chronos: Learning the Language of Time Series [[TMLR 2024]](https://arxiv.org/pdf/2403.07815) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/Chronos.py)
- [x] **Moirai** - Unified Training of Universal Time Series Forecasting Transformers [[ICML 2024]](https://arxiv.org/pdf/2402.02592)
- [x] **TimesFM** - TimesFM: A decoder-only foundation model for time-series forecasting [[ICML 2024]](https://arxiv.org/abs/2310.10688) [[代码]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesFM.py)
## 快速开始
### 准备数据
可从 [[Google Drive]](https://drive.google.com/drive/folders/13Cg1KYOlzM5C7K8gK8NfC-F3EYxkM3D2?usp=sharing)、[[Baidu Drive]](https://pan.baidu.com/s/1r3KhGd0Q9PJIUZdfEYoymg?pwd=i9iy) 或 [[Hugging Face]](https://huggingface.co/datasets/thuml/Time-Series-Library) 下载预处理数据,并置于 `./dataset` 目录。
### 安装
1. 克隆本仓库
```bash
git clone https://github.com/thuml/Time-Series-Library.git
cd Time-Series-Library
```
2. 创建新的 Conda 环境
```bash
conda create -n tslib python=3.11
conda activate tslib
```
3. 安装核心依赖
> ⚠️ **CUDA 兼容性提示**
> torch 预编译包与 **CUDA 版本强相关**。(查看 https://pytorch.org/get-started/previous-versions/ )
> 请确保torch安装与本地 CUDA 版本匹配的包(如 `cu118` 或 `cu121`)。
> 推荐torch==2.5.1
```bash
pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
pip install -r requirements.txt
```
4. 安装 Mamba 模型依赖(models/Mamba.py 需要)
> ⚠️ **只有linux版本**
> ⚠️ **CUDA 兼容性提示**
> Mamba 预编译包与 **CUDA 版本强相关**。
> 请确保安装与本地 CUDA 版本匹配的包(如 `cu11` 或 `cu12`)。
> 版本不匹配可能导致运行时错误或导入失败。
**CUDA 12** 示例:
```bash
pip install https://github.com/state-spaces/mamba/releases/download/v2.2.6.post3/mamba_ssm-2.2.6.post3+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
```
5. 安装 Moirai 模型依赖(models/Moirai.py 需要)
```bash
pip install uni2ts --no-deps
```
### Docker 部署
```bash
# 构建并以后台模式启动容器
docker compose -f 'Time-Series-Library/docker-compose.yml' up -d --build
# 在仓库根目录创建 ./dataset 并下载/放置数据集
mkdir -p dataset
# 将本地数据集复制到容器内 /workspace/dataset
docker cp ./dataset tslib:/workspace/dataset
# 进入运行中的容器
docker exec -it tslib bash
# 切换到容器内的工作目录
cd /workspace
# 使用预训练 Moirai 模型进行零样本预测
python -u run.py \
--task_name zero_shot_forecast \ # 任务类型:零样本预测
--is_training 0 \ # 0 = 仅推理
--root_path ./dataset/ETT-small/ \ # 数据集根路径
--data_path ETTh1.csv \ # 数据文件名
--model_id ETTh1_512_96 \ # 实验/模型标识
--model Moirai \ # 模型名称(TimesFM / Moirai)
--data ETTh1 \ # 数据集名称
--features M \ # 多变量预测
--seq_len 512 \ # 输入序列长度
--pred_len 96 \ # 预测步长
--enc_in 7 \ # 输入变量数
--des 'Exp' \ # 实验描述
--itr 1 # 运行次数
```
### 快速测试
5个任务快速测试(每个任务1个epoch):
```bash
# 执行所有5个任务的快速测试
export CUDA_VISIBLE_DEVICES=0
# 1. 长期预测
python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_long --model DLinear --data ETTh1 --features M --seq_len 96 --pred_len 96 --enc_in 7 --dec_in 7 --c_out 7 --train_epochs 1 --num_workers 2
# 2. 短期预测(使用ETT数据集,较短预测长度)
python -u run.py --task_name long_term_forecast --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_short --model TimesNet --data ETTh1 --features M --seq_len 24 --label_len 12 --pred_len 24 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 5 --train_epochs 1 --num_workers 2
# 3. 插补
python -u run.py --task_name imputation --is_training 1 --root_path ./dataset/ETT-small/ --data_path ETTh1.csv --model_id test_imp --model TimesNet --data ETTh1 --features M --seq_len 96 --e_layers 2 --d_layers 1 --d_model 16 --d_ff 32 --enc_in 7 --dec_in 7 --c_out 7 --top_k 3 --train_epochs 1 --num_workers 2 --label_len 0 --pred_len 0 --mask_rate 0.125 --learning_rate 0.001
# 4. 异常检测
python -u run.py --task_name anomaly_detection --is_training 1 --root_path ./dataset/PSM --model_id test_ad --model TimesNet --data PSM --features M --seq_len 100 --pred_len 0 --d_model 64 --d_ff 64 --e_layers 2 --enc_in 25 --c_out 25 --anomaly_ratio 1.0 --top_k 3 --train_epochs 1 --batch_size 128 --num_workers 2
# 5. 分类
python -u run.py --task_name classification --is_training 1 --root_path ./dataset/Heartbeat/ --model_id Heartbeat --model TimesNet --data UEA --e_layers 2 --d_layers 1 --factor 3 --d_model 64 --d_ff 128 --top_k 3 --train_epochs 1 --batch_size 16 --learning_rate 0.001 --num_workers 0
```
### 训练与评测
`./scripts/` 目录下提供了全部基准的实验脚本,可参考下列示例复现实验:
> ⚠️ 部分脚本中默认设置了 `CUDA_VISIBLE_DEVICES`,请根据实际 GPU 配置修改或删除该设置,否则可能导致无法使用 GPU。
```bash
# 长期预测
bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh
# 短期预测
bash ./scripts/short_term_forecast/TimesNet_M4.sh
# 插补
bash ./scripts/imputation/ETT_script/TimesNet_ETTh1.sh
# 异常检测
bash ./scripts/anomaly_detection/PSM/TimesNet.sh
# 分类
bash ./scripts/classification/TimesNet.sh
```
### 开发自定义模型
- 将模型文件放入 `./models`,可参考 `./models/Transformer.py`。
- 在 `./scripts` 下创建对应的运行脚本。
### 注意事项:
(1) 关于分类:由于我们在统一代码库中涵盖五大任务,各子任务的精度可能略有波动,但平均性能可复现(甚至略高)。复现用 checkpoint 可在 [此处](https://github.com/thuml/Time-Series-Library/issues/494) 下载。
(2) 关于异常检测:有关异常检测调整策略的讨论见[这里](https://github.com/thuml/Anomaly-Transformer/issues/14),核心是该调整策略对应事件级指标。
### 查看项目文件结构:
```
Time-Series-Library/
├── README.md # 官方README,包含任务、榜单、使用方法
├── requirements.txt # pip依赖列表,直接pip install复现环境
├── LICENSE / CONTRIBUTING.md # 原项目许可与贡献指南
├── run.py # 单入口脚本,解析参数并调度各任务
├── exp/ # 各任务实验管线,封装训练/验证/测试
│ ├── exp_basic.py # 实验基类,注册所有模型,统一构建流程
│ ├── exp_long_term_forecasting.py # 长期预测实验逻辑
│ ├── exp_short_term_forecasting.py # 短期预测实验逻辑
│ ├── exp_imputation.py # 缺失值填充实验
│ ├── exp_anomaly_detection.py # 异常检测实验
│ ├── exp_classification.py # 分类实验
│ └── exp_zero_shot_forecasting.py # LTSM零样本预测评估
├── data_provider/ # 数据入口,负责数据集载入与切分
│ ├── data_factory.py # 根据任务选择对应DataLoader
│ ├── data_loader.py # 通用时序数据读取与滑窗逻辑
│ ├── uea.py / m4.py # UEA、M4等特定数据格式处理
│ └── __init__.py # 暴露上层可用的数据工厂接口
├── models/ # 所有模型实现,文件名即模型名
│ ├── TimesNet.py、TimeMixer.py 等 # 主流预测模型
│ ├── Chronos2.py、TiRex.py # LTSM零样本模型
│ └── __init__.py # 统一导出供实验模块按名称实例化
├── layers/ # 复用层/块,如注意力、卷积、嵌入
│ ├── Transformer_EncDec.py # Transformer编解码堆栈
│ ├── AutoCorrelation.py # 自相关算子
│ ├── MultiWaveletCorrelation.py# 频域单元
│ └── Embed.py 等 # 各模型共享基元
├── utils/ # 工具集合
│ ├── metrics.py # MSE/MAE/DTW等评估指标
│ ├── tools.py # 训练通用工具,比如EarlyStopping
│ ├── augmentation.py # 分类/检测任务增强策略
│ ├── print_args.py # 统一打印参数
│ └── masking.py / losses.py # 任务相关辅助函数
├── scripts/ # 复现实验的bash脚本
│ ├── long_term_forecast/ # 按数据集/模型划分的长期预测脚本
│ ├── short_term_forecast/ # M4等短期预测脚本
│ ├── imputation/ # 多数据集缺失填充脚本
│ ├── anomaly_detection/ # SMD/SMAP/SWAT等检测脚本
│ ├── classification/ # UEA分类脚本
│ └── exogenous_forecast/ # TimeXer外生变量预测流程
├── tutorial/ # 官方TimesNet教学notebook与插图
└── pic/ # README插图(数据集分布等)
```
### 理解项目架构:
- **整体流程**:通过 `scripts/*.sh` 设定实验参数 → 调用 `python run.py ...` → `run.py` 解析参数并根据 `task_name` 选择对应 `Exp_*` 类 → `Exp_*` 内部利用 `data_provider` 构造数据加载器、`models` 实例化网络、`utils` 中的工具完成训练/验证/测试 → 结果与模型参数写入 `./checkpoints`。
- **实验层(exp/)**:`Exp_Basic` 负责注册模型与设备,子类实现 `_get_data/train/test`,将不同任务的差异隔离,方便模型在多任务间复用。
- **模型与层(models/ + layers/)**:模型文件集中定义各网络结构,公用的注意力、卷积、频域块等沉淀在 `layers/`,减少重复实现。
- **数据层(data_provider/)**:`data_factory` 按任务返回 Dataset/DataLoader,`data_loader` 封装序列裁剪、滑动窗口、掩码策略,不同任务通过参数控制窗口长度、缺失率、异常比例。
- **脚本层(scripts/)**:提供与论文一致的复现实验脚本,涵盖各种数据集/模型/GPU 配置,便于批量跑榜,也可作为自定义实验的起点。
- **辅助层(utils/)**:`metrics` 统一评估指标,`tools` 中的 `EarlyStopping`、`adjust_learning_rate` 等负责训练调度;`augmentation`/`masking` 等用于任务特定的数据增强或预处理。
- **学习建议**:阅读顺序推荐 `scripts -> run.py -> exp/exp_basic.py -> 对应 Exp 子类 -> data_provider -> models`,并结合 `tutorial/TimesNet_tutorial.ipynb` 快速熟悉整体调用链,再按需深入模型或层级实现。
## 引用
如果本仓库对您有帮助,请引用以下论文:
```
@inproceedings{wu2023timesnet,
title={TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis},
author={Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long},
booktitle={International Conference on Learning Representations},
year={2023},
}
@article{wang2024tssurvey,
title={Deep Time Series Models: A Comprehensive Survey and Benchmark},
author={Yuxuan Wang and Haixu Wu and Jiaxiang Dong and Yong Liu and Mingsheng Long and Jianmin Wang},
booktitle={arXiv preprint arXiv:2407.13278},
year={2024},
}
```
## 联系方式
如有问题或建议,欢迎联系维护团队:
现任:
- Haixu Wu(博士,wuhaixu98@gmail.com)
- Yuxuan Wang(博士生,wangyuxu22@mails.tsinghua.edu.cn)
- Yong Liu(博士生,liuyong21@mails.tsinghua.edu.cn)
- Ailuntz(开源社区学生,ailuntz@icloud.com)
往届:
- Huikun Weng(本科生,wenghk22@mails.tsinghua.edu.cn)
- Tengge Hu(硕士,htg21@mails.tsinghua.edu.cn)
- Haoran Zhang(硕士,z-hr20@mails.tsinghua.edu.cn)
- Jiawei Guo(本科生,guo-jw21@mails.tsinghua.edu.cn)
也欢迎在 Issues 中反馈。
## 致谢
本库参考了以下仓库:
- 预测:https://github.com/thuml/Autoformer
- 异常检测:https://github.com/thuml/Anomaly-Transformer
- 分类:https://github.com/thuml/Flowformer
实验所用数据集均为公开数据,来源如下:
- 长期预测与插补:https://github.com/thuml/Autoformer
- 短期预测:https://github.com/ServiceNow/N-BEATS
- 异常检测:https://github.com/thuml/Anomaly-Transformer
- 分类:https://www.timeseriesclassification.com/
## 感谢所有贡献者
================================================
FILE: data_provider/__init__.py
================================================
================================================
FILE: data_provider/data_factory.py
================================================
from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \
MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader
from data_provider.uea import collate_fn
from torch.utils.data import DataLoader
data_dict = {
'ETTh1': Dataset_ETT_hour,
'ETTh2': Dataset_ETT_hour,
'ETTm1': Dataset_ETT_minute,
'ETTm2': Dataset_ETT_minute,
'custom': Dataset_Custom,
'm4': Dataset_M4,
'PSM': PSMSegLoader,
'MSL': MSLSegLoader,
'SMAP': SMAPSegLoader,
'SMD': SMDSegLoader,
'SWAT': SWATSegLoader,
'UEA': UEAloader
}
def data_provider(args, flag):
Data = data_dict[args.data]
timeenc = 0 if args.embed != 'timeF' else 1
shuffle_flag = False if (flag == 'test' or flag == 'TEST') else True
drop_last = False
batch_size = args.batch_size
freq = args.freq
if args.task_name == 'anomaly_detection':
drop_last = False
data_set = Data(
args = args,
root_path=args.root_path,
win_size=args.seq_len,
flag=flag,
)
print(flag, len(data_set))
data_loader = DataLoader(
data_set,
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
drop_last=drop_last)
return data_set, data_loader
elif args.task_name == 'classification':
drop_last = False
data_set = Data(
args = args,
root_path=args.root_path,
flag=flag,
)
data_loader = DataLoader(
data_set,
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
drop_last=drop_last,
collate_fn=lambda x: collate_fn(x, max_len=args.seq_len)
)
return data_set, data_loader
else:
if args.data == 'm4':
drop_last = False
data_set = Data(
args = args,
root_path=args.root_path,
data_path=args.data_path,
flag=flag,
size=[args.seq_len, args.label_len, args.pred_len],
features=args.features,
target=args.target,
timeenc=timeenc,
freq=freq,
seasonal_patterns=args.seasonal_patterns
)
print(flag, len(data_set))
data_loader = DataLoader(
data_set,
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
drop_last=drop_last)
return data_set, data_loader
================================================
FILE: data_provider/data_loader.py
================================================
import os
import numpy as np
import pandas as pd
import glob
import re
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from utils.timefeatures import time_features
from data_provider.m4 import M4Dataset, M4Meta
from data_provider.uea import subsample, interpolate_missing, Normalizer
from sktime.datasets import load_from_tsfile_to_dataframe
import warnings
from utils.augmentation import run_augmentation_single
from datasets import load_dataset
from huggingface_hub import hf_hub_download
warnings.filterwarnings('ignore')
HUGGINGFACE_REPO = "thuml/Time-Series-Library"
class Dataset_ETT_hour(Dataset):
def __init__(self, args, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
self.args = args
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
local_fp = os.path.join(self.root_path, self.data_path)
cfg_name = os.path.splitext(os.path.basename(self.data_path))[0]
if os.path.exists(local_fp):
df_raw = pd.read_csv(local_fp)
else:
ds = load_dataset(HUGGINGFACE_REPO, name=cfg_name)
df_raw = ds["train"].to_pandas()
border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
if self.set_type == 0 and self.args.augmentation_ratio > 0:
self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_ETT_minute(Dataset):
def __init__(self, args, root_path, flag='train', size=None,
features='S', data_path='ETTm1.csv',
target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
self.args = args
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
local_fp = os.path.join(self.root_path, self.data_path)
cfg_name = os.path.splitext(os.path.basename(self.data_path))[0]
if os.path.exists(local_fp):
df_raw = pd.read_csv(local_fp)
else:
ds = load_dataset(HUGGINGFACE_REPO, name=cfg_name)
df_raw = ds["train"].to_pandas()
border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
if self.set_type == 0 and self.args.augmentation_ratio > 0:
self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Custom(Dataset):
def __init__(self, args, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
# size [seq_len, label_len, pred_len]
self.args = args
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
local_fp = os.path.join(self.root_path, self.data_path)
cfg_name = os.path.splitext(os.path.basename(self.data_path))[0]
if os.path.exists(local_fp):
df_raw = pd.read_csv(local_fp)
else:
ds = load_dataset(HUGGINGFACE_REPO, name=cfg_name)
split_name = "train" if "train" in ds else list(ds.keys())[0]
df_raw = ds[split_name].to_pandas()
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
df_raw = df_raw[['date'] + cols + [self.target]]
num_train = int(len(df_raw) * 0.7)
num_test = int(len(df_raw) * 0.2)
num_vali = len(df_raw) - num_train - num_test
border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
border2s = [num_train, num_train + num_vali, len(df_raw)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
if self.set_type == 0 and self.args.augmentation_ratio > 0:
self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_M4(Dataset):
def __init__(self, args, root_path, flag='pred', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=False, inverse=False, timeenc=0, freq='15min',
seasonal_patterns='Yearly'):
# size [seq_len, label_len, pred_len]
# init
self.features = features
self.target = target
self.scale = scale
self.inverse = inverse
self.timeenc = timeenc
self.root_path = root_path
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
self.seasonal_patterns = seasonal_patterns
self.history_size = M4Meta.history_size[seasonal_patterns]
self.window_sampling_limit = int(self.history_size * self.pred_len)
self.flag = flag
self.__read_data__()
def __read_data__(self):
# M4Dataset.initialize()
if self.flag == 'train':
dataset = M4Dataset.load(training=True, dataset_file=self.root_path)
else:
dataset = M4Dataset.load(training=False, dataset_file=self.root_path)
training_values = np.array(
[v[~np.isnan(v)] for v in
dataset.values[dataset.groups == self.seasonal_patterns]]) # split different frequencies
self.ids = np.array([i for i in dataset.ids[dataset.groups == self.seasonal_patterns]])
self.timeseries = [ts for ts in training_values]
def __getitem__(self, index):
insample = np.zeros((self.seq_len, 1))
insample_mask = np.zeros((self.seq_len, 1))
outsample = np.zeros((self.pred_len + self.label_len, 1))
outsample_mask = np.zeros((self.pred_len + self.label_len, 1)) # m4 dataset
sampled_timeseries = self.timeseries[index]
cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
high=len(sampled_timeseries),
size=1)[0]
insample_window = sampled_timeseries[max(0, cut_point - self.seq_len):cut_point]
insample[-len(insample_window):, 0] = insample_window
insample_mask[-len(insample_window):, 0] = 1.0
outsample_window = sampled_timeseries[
max(0, cut_point - self.label_len):min(len(sampled_timeseries), cut_point + self.pred_len)]
outsample[:len(outsample_window), 0] = outsample_window
outsample_mask[:len(outsample_window), 0] = 1.0
return insample, outsample, insample_mask, outsample_mask
def __len__(self):
return len(self.timeseries)
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
def last_insample_window(self):
"""
The last window of insample size of all timeseries.
This function does not support batching and does not reshuffle timeseries.
:return: Last insample window of all timeseries. Shape "timeseries, insample size"
"""
insample = np.zeros((len(self.timeseries), self.seq_len))
insample_mask = np.zeros((len(self.timeseries), self.seq_len))
for i, ts in enumerate(self.timeseries):
ts_last_window = ts[-self.seq_len:]
insample[i, -len(ts):] = ts_last_window
insample_mask[i, -len(ts):] = 1.0
return insample, insample_mask
class PSMSegLoader(Dataset):
def __init__(self, args, root_path, win_size, step=1, flag="train"):
self.flag = flag
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
train_path = os.path.join(root_path, "train.csv")
test_path = os.path.join(root_path, "test.csv")
label_path = os.path.join(root_path, "test_label.csv")
if all(os.path.exists(p) for p in [train_path, test_path, label_path]):
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
test_label_df = pd.read_csv(label_path)
else:
ds_data = load_dataset(HUGGINGFACE_REPO, name="PSM-data")
ds_label = load_dataset(HUGGINGFACE_REPO, name="PSM-label")
train_df = ds_data["train"].to_pandas()
test_df = ds_data["test"].to_pandas()
test_label_df = ds_label[next(iter(ds_label))].to_pandas()
data = train_df.values[:, 1:]
data = np.nan_to_num(data)
self.scaler.fit(data)
data = self.scaler.transform(data)
test_data = test_df.values[:, 1:]
test_data = np.nan_to_num(test_data)
self.test = self.scaler.transform(test_data)
self.train = data
data_len = len(self.train)
self.val = self.train[(int)(data_len * 0.8):]
self.test_labels = test_label_df.values[:, 1:]
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
if self.flag == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.flag == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class MSLSegLoader(Dataset):
def __init__(self, args, root_path, win_size, step=1, flag="train"):
self.flag = flag
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
train_path = os.path.join(root_path, "MSL_train.npy")
test_path = os.path.join(root_path, "MSL_test.npy")
label_path = os.path.join(root_path, "MSL_test_label.npy")
if all(os.path.exists(p) for p in [train_path, test_path, label_path]):
train_data = np.load(train_path)
test_data = np.load(test_path)
test_label = np.load(label_path)
else:
train_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="MSL/MSL_train.npy",repo_type="dataset")
test_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="MSL/MSL_test.npy",repo_type="dataset")
label_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="MSL/MSL_test_label.npy",repo_type="dataset")
train_data = np.load(train_path)
test_data = np.load(test_path)
test_label = np.load(label_path)
self.scaler.fit(train_data)
train_data = self.scaler.transform(train_data)
test_data = self.scaler.transform(test_data)
self.train = train_data
self.test = test_data
self.test_labels = test_label
data_len = len(self.train)
self.val = self.train[int(data_len * 0.8):]
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
if self.flag == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.flag == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SMAPSegLoader(Dataset):
def __init__(self, args, root_path, win_size, step=1, flag="train"):
self.flag = flag
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
train_path = os.path.join(root_path, "SMAP_train.npy")
test_path = os.path.join(root_path, "SMAP_test.npy")
label_path = os.path.join(root_path, "SMAP_test_label.npy")
if all(os.path.exists(p) for p in [train_path, test_path, label_path]):
train_data = np.load(train_path)
test_data = np.load(test_path)
test_label = np.load(label_path)
else:
train_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMAP/SMAP_train.npy",repo_type="dataset")
test_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMAP/SMAP_test.npy",repo_type="dataset")
label_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMAP/SMAP_test_label.npy",repo_type="dataset")
train_data = np.load(train_path)
test_data = np.load(test_path)
test_label = np.load(label_path)
# 标准化
self.scaler.fit(train_data)
train_data = self.scaler.transform(train_data)
test_data = self.scaler.transform(test_data)
self.train = train_data
self.test = test_data
self.test_labels = test_label
data_len = len(self.train)
self.val = self.train[int(data_len * 0.8):]
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
if self.flag == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.flag == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SMDSegLoader(Dataset):
def __init__(self, args, root_path, win_size, step=100, flag="train"):
self.flag = flag
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
train_path = os.path.join(root_path, "SMD_train.npy")
test_path = os.path.join(root_path, "SMD_test.npy")
label_path = os.path.join(root_path, "SMD_test_label.npy")
if all(os.path.exists(p) for p in [train_path, test_path, label_path]):
train_data = np.load(train_path)
test_data = np.load(test_path)
test_label = np.load(label_path)
else:
train_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMD/SMD_train.npy",repo_type="dataset")
test_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMD/SMD_test.npy",repo_type="dataset")
label_path = hf_hub_download(repo_id=HUGGINGFACE_REPO, filename="SMD/SMD_test_label.npy",repo_type="dataset")
train_data = np.load(train_path)
test_data = np.load(test_path)
test_label = np.load(label_path)
self.scaler.fit(train_data)
train_data = self.scaler.transform(train_data)
test_data = self.scaler.transform(test_data)
self.train = train_data
self.test = test_data
data_len = len(self.train)
self.val = self.train[(int)(data_len * 0.8):]
self.test_labels = test_label
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
if self.flag == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.flag == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class SWATSegLoader(Dataset):
def __init__(self, args, root_path, win_size, step=1, flag="train"):
self.flag = flag
self.step = step
self.win_size = win_size
self.scaler = StandardScaler()
train2_path = os.path.join(root_path, "swat_train2.csv")
test_path = os.path.join(root_path, "swat2.csv")
if all(os.path.exists(p) for p in [train2_path, test_path]):
train_data = pd.read_csv(train2_path)
test_data = pd.read_csv(test_path)
else:
ds = load_dataset(HUGGINGFACE_REPO, name="SWaT")
train_data = ds["train"].to_pandas()
test_data = ds["test"].to_pandas()
labels = test_data.values[:, -1:]
train_data = train_data.values[:, :-1]
test_data = test_data.values[:, :-1]
self.scaler.fit(train_data)
train_data = self.scaler.transform(train_data)
test_data = self.scaler.transform(test_data)
self.train = train_data
self.test = test_data
data_len = len(self.train)
self.val = self.train[(int)(data_len * 0.8):]
self.test_labels = labels
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
"""
Number of images in the object dataset.
"""
if self.flag == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.flag == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.flag == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.flag == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
class UEAloader(Dataset):
"""
Dataset class for datasets included in:
Time Series Classification Archive (www.timeseriesclassification.com)
Argument:
limit_size: float in (0, 1) for debug
Attributes:
all_df: (num_samples * seq_len, num_columns) dataframe indexed by integer indices, with multiple rows corresponding to the same index (sample).
Each row is a time step; Each column contains either metadata (e.g. timestamp) or a feature.
feature_df: (num_samples * seq_len, feat_dim) dataframe; contains the subset of columns of `all_df` which correspond to selected features
feature_names: names of columns contained in `feature_df` (same as feature_df.columns)
all_IDs: (num_samples,) series of IDs contained in `all_df`/`feature_df` (same as all_df.index.unique() )
labels_df: (num_samples, num_labels) pd.DataFrame of label(s) for each sample
max_seq_len: maximum sequence (time series) length. If None, script argument `max_seq_len` will be used.
(Moreover, script argument overrides this attribute)
"""
def __init__(self, args, root_path, file_list=None, limit_size=None, flag=None):
self.args = args
self.root_path = root_path
self.flag = flag
self.all_df, self.labels_df = self.load_all(root_path, file_list=file_list, flag=flag)
self.all_IDs = self.all_df.index.unique() # all sample IDs (integer indices 0 ... num_samples-1)
if limit_size is not None:
if limit_size > 1:
limit_size = int(limit_size)
else: # interpret as proportion if in (0, 1]
limit_size = int(limit_size * len(self.all_IDs))
self.all_IDs = self.all_IDs[:limit_size]
self.all_df = self.all_df.loc[self.all_IDs]
# use all features
self.feature_names = self.all_df.columns
self.feature_df = self.all_df
# pre_process
normalizer = Normalizer()
self.feature_df = normalizer.normalize(self.feature_df)
print(len(self.all_IDs))
def _resolve_ts_path(self, root_path, dataset_name, flag):
split = "TRAIN" if "train" in str(flag).lower() else "TEST"
fname = f"{dataset_name}_{split}.ts"
local = os.path.join(root_path, fname)
if os.path.exists(local):
return local
return hf_hub_download(HUGGINGFACE_REPO, filename=f"{dataset_name}/{fname}", repo_type="dataset")
def load_all(self, root_path, file_list=None, flag=None):
"""
Loads datasets from ts files contained in `root_path` into a dataframe, optionally choosing from `pattern`
Args:
root_path: directory containing all individual .ts files
file_list: optionally, provide a list of file paths within `root_path` to consider.
Otherwise, entire `root_path` contents will be used.
Returns:
all_df: a single (possibly concatenated) dataframe with all data corresponding to specified files
labels_df: dataframe containing label(s) for each sample
"""
# Select paths for training and evaluation
dataset_name = self.args.model_id
ts_path = self._resolve_ts_path(root_path, dataset_name, flag or "train")
all_df, labels_df = self.load_single(ts_path)
return all_df, labels_df
def load_single(self, filepath):
df, labels = load_from_tsfile_to_dataframe(filepath, return_separate_X_and_y=True,
replace_missing_vals_with='NaN')
labels = pd.Series(labels, dtype="category")
self.class_names = labels.cat.categories
labels_df = pd.DataFrame(labels.cat.codes,
dtype=np.int8) # int8-32 gives an error when using nn.CrossEntropyLoss
lengths = df.applymap(
lambda x: len(x)).values # (num_samples, num_dimensions) array containing the length of each series
horiz_diffs = np.abs(lengths - np.expand_dims(lengths[:, 0], -1))
if np.sum(horiz_diffs) > 0: # if any row (sample) has varying length across dimensions
df = df.applymap(subsample)
lengths = df.applymap(lambda x: len(x)).values
vert_diffs = np.abs(lengths - np.expand_dims(lengths[0, :], 0))
if np.sum(vert_diffs) > 0: # if any column (dimension) has varying length across samples
self.max_seq_len = int(np.max(lengths[:, 0]))
else:
self.max_seq_len = lengths[0, 0]
# First create a (seq_len, feat_dim) dataframe for each sample, indexed by a single integer ("ID" of the sample)
# Then concatenate into a (num_samples * seq_len, feat_dim) dataframe, with multiple rows corresponding to the
# sample index (i.e. the same scheme as all datasets in this project)
df = pd.concat((pd.DataFrame({col: df.loc[row, col] for col in df.columns}).reset_index(drop=True).set_index(
pd.Series(lengths[row, 0] * [row])) for row in range(df.shape[0])), axis=0)
# Replace NaN values
grp = df.groupby(by=df.index)
df = grp.transform(interpolate_missing)
return df, labels_df
def instance_norm(self, case):
if self.root_path.count('EthanolConcentration') > 0: # special process for numerical stability
mean = case.mean(0, keepdim=True)
case = case - mean
stdev = torch.sqrt(torch.var(case, dim=1, keepdim=True, unbiased=False) + 1e-5)
case /= stdev
return case
else:
return case
def __getitem__(self, ind):
batch_x = self.feature_df.loc[self.all_IDs[ind]].values
labels = self.labels_df.loc[self.all_IDs[ind]].values
if self.flag == "TRAIN" and self.args.augmentation_ratio > 0:
num_samples = len(self.all_IDs)
num_columns = self.feature_df.shape[1]
seq_len = int(self.feature_df.shape[0] / num_samples)
batch_x = batch_x.reshape((1, seq_len, num_columns))
batch_x, labels, augmentation_tags = run_augmentation_single(batch_x, labels, self.args)
batch_x = batch_x.reshape((1 * seq_len, num_columns))
return self.instance_norm(torch.from_numpy(batch_x)), \
torch.from_numpy(labels)
def __len__(self):
return len(self.all_IDs)
================================================
FILE: data_provider/m4.py
================================================
# This source code is provided for the purposes of scientific reproducibility
# under the following limited license from Element AI Inc. The code is an
# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
# expansion analysis for interpretable time series forecasting,
# https://arxiv.org/abs/1905.10437). The copyright to the source code is
# licensed under the Creative Commons - Attribution-NonCommercial 4.0
# International license (CC BY-NC 4.0):
# https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
# for the benefit of third parties or internally in production) requires an
# explicit license. The subject-matter of the N-BEATS model and associated
# materials are the property of Element AI Inc. and may be subject to patent
# protection. No license to patents is granted hereunder (whether express or
# implied). Copyright © 2020 Element AI Inc. All rights reserved.
"""
M4 Dataset
"""
import logging
import os
from collections import OrderedDict
from dataclasses import dataclass
from glob import glob
import numpy as np
import pandas as pd
import patoolib
from tqdm import tqdm
import logging
import os
import pathlib
import sys
from urllib import request
from huggingface_hub import hf_hub_download
HUGGINGFACE_REPO = "thuml/Time-Series-Library"
def _ensure_m4_triplet(root_dir="./dataset/m4", repo_id=HUGGINGFACE_REPO):
root_dir = os.path.abspath(root_dir)
os.makedirs(root_dir, exist_ok=True)
files = {
"M4-info.csv": "m4/M4-info.csv",
"training.npz": "m4/training.npz",
"test.npz": "m4/test.npz",
}
for name, remote in files.items():
dst = os.path.join(root_dir, name)
if not os.path.exists(dst):
path = hf_hub_download(
repo_id=repo_id,
filename=remote,
repo_type="dataset",
local_dir="./dataset",
local_dir_use_symlinks=False
)
def url_file_name(url: str) -> str:
"""
Extract file name from url.
:param url: URL to extract file name from.
:return: File name.
"""
return url.split('/')[-1] if len(url) > 0 else ''
def download(url: str, file_path: str) -> None:
"""
Download a file to the given path.
:param url: URL to download
:param file_path: Where to download the content.
"""
def progress(count, block_size, total_size):
progress_pct = float(count * block_size) / float(total_size) * 100.0
sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct))
sys.stdout.flush()
if not os.path.isfile(file_path):
opener = request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
request.install_opener(opener)
pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
f, _ = request.urlretrieve(url, file_path, progress)
sys.stdout.write('\n')
sys.stdout.flush()
file_info = os.stat(f)
logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
else:
file_info = os.stat(file_path)
logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
@dataclass()
class M4Dataset:
ids: np.ndarray
groups: np.ndarray
frequencies: np.ndarray
horizons: np.ndarray
values: np.ndarray
@staticmethod
def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset':
"""
Load cached dataset.
:param training: Load training part if training is True, test part otherwise.
"""
_ensure_m4_triplet(dataset_file, repo_id=HUGGINGFACE_REPO)
info_file = os.path.join(dataset_file, 'M4-info.csv')
train_cache_file = os.path.join(dataset_file, 'training.npz')
test_cache_file = os.path.join(dataset_file, 'test.npz')
m4_info = pd.read_csv(info_file)
return M4Dataset(ids=m4_info.M4id.values,
groups=m4_info.SP.values,
frequencies=m4_info.Frequency.values,
horizons=m4_info.Horizon.values,
values=np.load(
train_cache_file if training else test_cache_file,
allow_pickle=True))
@dataclass()
class M4Meta:
seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']
horizons = [6, 8, 18, 13, 14, 48]
frequencies = [1, 4, 12, 1, 1, 24]
horizons_map = {
'Yearly': 6,
'Quarterly': 8,
'Monthly': 18,
'Weekly': 13,
'Daily': 14,
'Hourly': 48
} # different predict length
frequency_map = {
'Yearly': 1,
'Quarterly': 4,
'Monthly': 12,
'Weekly': 1,
'Daily': 1,
'Hourly': 24
}
history_size = {
'Yearly': 1.5,
'Quarterly': 1.5,
'Monthly': 1.5,
'Weekly': 10,
'Daily': 10,
'Hourly': 10
} # from interpretable.gin
def load_m4_info() -> pd.DataFrame:
"""
Load M4Info file.
:return: Pandas DataFrame of M4Info.
"""
# return pd.read_csv(INFO_FILE_PATH)
================================================
FILE: data_provider/uea.py
================================================
import os
import numpy as np
import pandas as pd
import torch
def collate_fn(data, max_len=None):
"""Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create
Args:
data: len(batch_size) list of tuples (X, y).
- X: torch tensor of shape (seq_length, feat_dim); variable seq_length.
- y: torch tensor of shape (num_labels,) : class indices or numerical targets
(for classification or regression, respectively). num_labels > 1 for multi-task models
max_len: global fixed sequence length. Used for architectures requiring fixed length input,
where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s
Returns:
X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input)
targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output)
target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor
0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values
padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding
"""
batch_size = len(data)
features, labels = zip(*data)
# Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension)
lengths = [X.shape[0] for X in features] # original sequence length for each time series
if max_len is None:
max_len = max(lengths)
X = torch.zeros(batch_size, max_len, features[0].shape[-1]) # (batch_size, padded_length, feat_dim)
for i in range(batch_size):
end = min(lengths[i], max_len)
X[i, :end, :] = features[i][:end, :]
targets = torch.stack(labels, dim=0) # (batch_size, num_labels)
padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),
max_len=max_len) # (batch_size, padded_length) boolean tensor, "1" means keep
return X, targets, padding_masks
def padding_mask(lengths, max_len=None):
"""
Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths,
where 1 means keep element at this position (time step)
"""
batch_size = lengths.numel()
max_len = max_len or lengths.max_val() # trick works because of overloading of 'or' operator for non-boolean types
return (torch.arange(0, max_len, device=lengths.device)
.type_as(lengths)
.repeat(batch_size, 1)
.lt(lengths.unsqueeze(1)))
class Normalizer(object):
"""
Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
"""
def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
"""
Args:
norm_type: choose from:
"standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
"per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
"""
self.norm_type = norm_type
self.mean = mean
self.std = std
self.min_val = min_val
self.max_val = max_val
def normalize(self, df):
"""
Args:
df: input dataframe
Returns:
df: normalized dataframe
"""
if self.norm_type == "standardization":
if self.mean is None:
self.mean = df.mean()
self.std = df.std()
return (df - self.mean) / (self.std + np.finfo(float).eps)
elif self.norm_type == "minmax":
if self.max_val is None:
self.max_val = df.max()
self.min_val = df.min()
return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)
elif self.norm_type == "per_sample_std":
grouped = df.groupby(by=df.index)
return (df - grouped.transform('mean')) / grouped.transform('std')
elif self.norm_type == "per_sample_minmax":
grouped = df.groupby(by=df.index)
min_vals = grouped.transform('min')
return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)
else:
raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))
def interpolate_missing(y):
"""
Replaces NaN values in pd.Series `y` using linear interpolation
"""
if y.isna().any():
y = y.interpolate(method='linear', limit_direction='both')
return y
def subsample(y, limit=256, factor=2):
"""
If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
"""
if len(y) > limit:
return y[::factor].reset_index(drop=True)
return y
================================================
FILE: docker-compose.yml
================================================
services:
dev_tslib:
image: tslib
build:
context: .
target: tslib
# args:
# http_proxy: "http://192.168.8.135:7897" #optional, Modify it to your agent address
# https_proxy: "http://192.168.8.135:7897" #optional, Modify it to your agent address
container_name: tslib
shm_size: 8gb
tty: true
restart: always
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
# - http_proxy=http://192.168.8.135:7897 #optional, Modify it to your agent address
# - https_proxy=http://192.168.8.135:7897 #optional, Modify it to your agent address
# ports:
# - "8888:8888"
# - "6006:6006"
volumes:
- workspace_data:/workspace
working_dir: /workspace
volumes:
workspace_data:
================================================
FILE: exp/__init__.py
================================================
================================================
FILE: exp/exp_anomaly_detection.py
================================================
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, adjustment
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
warnings.filterwarnings('ignore')
class Exp_Anomaly_Detection(Exp_Basic):
def __init__(self, args):
super(Exp_Anomaly_Detection, self).__init__(args)
def _build_model(self):
model = self.model_dict[self.args.model](self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def vali(self, vali_data, vali_loader, criterion):
total_loss = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, _) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
outputs = self.model(batch_x, None, None, None)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, :, f_dim:]
pred = outputs.detach()
true = batch_x.detach()
loss = criterion(pred, true)
total_loss.append(loss.item())
total_loss = np.average(total_loss)
self.model.train()
return total_loss
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
test_data, test_loader = self._get_data(flag='test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
outputs = self.model(batch_x, None, None, None)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, :, f_dim:]
loss = criterion(outputs, batch_x)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
train_data, train_loader = self._get_data(flag='train')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
attens_energy = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
self.anomaly_criterion = nn.MSELoss(reduce=False)
# (1) stastic on the train set
with torch.no_grad():
for i, (batch_x, batch_y) in enumerate(train_loader):
batch_x = batch_x.float().to(self.device)
# reconstruction
outputs = self.model(batch_x, None, None, None)
# criterion
score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
score = score.detach().cpu().numpy()
attens_energy.append(score)
attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
train_energy = np.array(attens_energy)
# (2) find the threshold
attens_energy = []
test_labels = []
for i, (batch_x, batch_y) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device)
# reconstruction
outputs = self.model(batch_x, None, None, None)
# criterion
score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
score = score.detach().cpu().numpy()
attens_energy.append(score)
test_labels.append(batch_y)
attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
test_energy = np.array(attens_energy)
combined_energy = np.concatenate([train_energy, test_energy], axis=0)
threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio)
print("Threshold :", threshold)
# (3) evaluation on the test set
pred = (test_energy > threshold).astype(int)
test_labels = np.concatenate(test_labels, axis=0).reshape(-1)
test_labels = np.array(test_labels)
gt = test_labels.astype(int)
print("pred: ", pred.shape)
print("gt: ", gt.shape)
# (4) detection adjustment
gt, pred = adjustment(gt, pred)
pred = np.array(pred)
gt = np.array(gt)
print("pred: ", pred.shape)
print("gt: ", gt.shape)
accuracy = accuracy_score(gt, pred)
precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary')
print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
accuracy, precision,
recall, f_score))
f = open("result_anomaly_detection.txt", 'a')
f.write(setting + " \n")
f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
accuracy, precision,
recall, f_score))
f.write('\n')
f.write('\n')
f.close()
return
================================================
FILE: exp/exp_basic.py
================================================
import os
import torch
import importlib
import pkgutil
# Just put your model files under models/ folder
# e.g., models/Transformer.py, models/LSTM.py, etc.
# All models will be automatically detected and can be used by specifying their names.
class Exp_Basic(object):
def __init__(self, args):
self.args = args
# -------------------------------------------------------
# Automatically generate model map
# -------------------------------------------------------
model_map = self._scan_models_directory()
# Use smart dictionary
self.model_dict = LazyModelDict(model_map)
self.device = self._acquire_device()
self.model = self._build_model().to(self.device)
def _scan_models_directory(self):
"""
Automatically scan all .py files in the models folder
"""
model_map = {}
models_dir = 'models'
# Iterate through all files in 'models' directory
if os.path.exists(models_dir):
for filename in os.listdir(models_dir):
# Ignore __init__.py and non-.py files
if filename.endswith('.py') and filename != '__init__.py':
# Remove .py extension to get module name
module_name = filename[:-3]
# Build full import path
full_path = f"{models_dir}.{module_name}"
# loading dict: {'Transformer': 'models.Transformer'}
model_map[module_name] = full_path
return model_map
def _build_model(self):
raise NotImplementedError
return None
def _acquire_device(self):
if self.args.use_gpu and self.args.gpu_type == 'cuda':
os.environ["CUDA_VISIBLE_DEVICES"] = str(
self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
device = torch.device('cuda:{}'.format(self.args.gpu))
print('Use GPU: cuda:{}'.format(self.args.gpu))
elif self.args.use_gpu and self.args.gpu_type == 'mps':
device = torch.device('mps')
print('Use GPU: mps')
else:
device = torch.device('cpu')
print('Use CPU')
return device
def _get_data(self):
pass
def vali(self):
pass
def train(self):
pass
def test(self):
pass
class LazyModelDict(dict):
"""
Smart Lazy-Loading Dictionary
"""
def __init__(self, model_map):
self.model_map = model_map
super().__init__()
def __getitem__(self, key):
if key in self:
return super().__getitem__(key)
if key not in self.model_map:
raise NotImplementedError(f"Model [{key}] not found in 'models' directory.")
module_path = self.model_map[key]
try:
print(f"🚀 Lazy Loading: {key} ...")
module = importlib.import_module(module_path)
except ImportError as e:
print(f"❌ Error: Failed to import model [{key}]. Dependencies missing?")
raise e
# Try to find the model class
if hasattr(module, 'Model'):
model_class = module.Model
elif hasattr(module, key):
model_class = getattr(module, key)
else:
raise AttributeError(f"Module {module_path} has no class 'Model' or '{key}'")
self[key] = model_class
return model_class
================================================
FILE: exp/exp_classification.py
================================================
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
import pdb
warnings.filterwarnings('ignore')
class Exp_Classification(Exp_Basic):
def __init__(self, args):
super(Exp_Classification, self).__init__(args)
def _build_model(self):
# model input depends on data
train_data, train_loader = self._get_data(flag='TRAIN')
test_data, test_loader = self._get_data(flag='TEST')
self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len)
self.args.pred_len = 0
self.args.enc_in = train_data.feature_df.shape[1]
self.args.num_class = len(train_data.class_names)
# model init
model = self.model_dict[self.args.model](self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
# model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.CrossEntropyLoss()
return criterion
def vali(self, vali_data, vali_loader, criterion):
total_loss = []
preds = []
trues = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, label, padding_mask) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
padding_mask = padding_mask.float().to(self.device)
label = label.to(self.device)
outputs = self.model(batch_x, padding_mask, None, None)
pred = outputs.detach()
loss = criterion(pred, label.long().squeeze())
total_loss.append(loss.item())
preds.append(outputs.detach())
trues.append(label)
total_loss = np.average(total_loss)
preds = torch.cat(preds, 0)
trues = torch.cat(trues, 0)
probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample
predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample
trues = trues.flatten().cpu().numpy()
accuracy = cal_accuracy(predictions, trues)
self.model.train()
return total_loss, accuracy
def train(self, setting):
train_data, train_loader = self._get_data(flag='TRAIN')
vali_data, vali_loader = self._get_data(flag='TEST')
test_data, test_loader = self._get_data(flag='TEST')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, label, padding_mask) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
padding_mask = padding_mask.float().to(self.device)
label = label.to(self.device)
outputs = self.model(batch_x, padding_mask, None, None)
loss = criterion(outputs, label.long().squeeze(-1))
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward()
nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0)
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion)
test_loss, test_accuracy = self.vali(test_data, test_loader, criterion)
print(
"Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}"
.format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy))
early_stopping(-val_accuracy, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='TEST')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join(self.args.checkpoints, setting, 'checkpoint.pth')))
preds = []
trues = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, label, padding_mask) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device)
padding_mask = padding_mask.float().to(self.device)
label = label.to(self.device)
outputs = self.model(batch_x, padding_mask, None, None)
preds.append(outputs.detach())
trues.append(label)
preds = torch.cat(preds, 0)
trues = torch.cat(trues, 0)
print('test shape:', preds.shape, trues.shape)
probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample
predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample
trues = trues.flatten().cpu().numpy()
accuracy = cal_accuracy(predictions, trues)
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
print('accuracy:{}'.format(accuracy))
file_name='result_classification.txt'
f = open(os.path.join(folder_path,file_name), 'a')
f.write(setting + " \n")
f.write('accuracy:{}'.format(accuracy))
f.write('\n')
f.write('\n')
f.close()
return
================================================
FILE: exp/exp_imputation.py
================================================
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
warnings.filterwarnings('ignore')
class Exp_Imputation(Exp_Basic):
def __init__(self, args):
super(Exp_Imputation, self).__init__(args)
def _build_model(self):
model = self.model_dict[self.args.model](self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def vali(self, vali_data, vali_loader, criterion):
total_loss = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
# random mask
B, T, N = batch_x.shape
"""
B = batch size
T = seq len
N = number of features
"""
mask = torch.rand((B, T, N)).to(self.device)
mask[mask <= self.args.mask_rate] = 0 # masked
mask[mask > self.args.mask_rate] = 1 # remained
inp = batch_x.masked_fill(mask == 0, 0)
outputs = self.model(inp, batch_x_mark, None, None, mask)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, :, f_dim:]
# add support for MS
batch_x = batch_x[:, :, f_dim:]
mask = mask[:, :, f_dim:]
pred = outputs.detach()
true = batch_x.detach()
mask = mask.detach()
loss = criterion(pred[mask == 0], true[mask == 0])
total_loss.append(loss.item())
total_loss = np.average(total_loss)
self.model.train()
return total_loss
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
test_data, test_loader = self._get_data(flag='test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
# random mask
B, T, N = batch_x.shape
mask = torch.rand((B, T, N)).to(self.device)
mask[mask <= self.args.mask_rate] = 0 # masked
mask[mask > self.args.mask_rate] = 1 # remained
inp = batch_x.masked_fill(mask == 0, 0)
outputs = self.model(inp, batch_x_mark, None, None, mask)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, :, f_dim:]
# add support for MS
batch_x = batch_x[:, :, f_dim:]
mask = mask[:, :, f_dim:]
loss = criterion(outputs[mask == 0], batch_x[mask == 0])
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
preds = []
trues = []
masks = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
# random mask
B, T, N = batch_x.shape
mask = torch.rand((B, T, N)).to(self.device)
mask[mask <= self.args.mask_rate] = 0 # masked
mask[mask > self.args.mask_rate] = 1 # remained
inp = batch_x.masked_fill(mask == 0, 0)
# imputation
outputs = self.model(inp, batch_x_mark, None, None, mask)
# eval
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, :, f_dim:]
# add support for MS
batch_x = batch_x[:, :, f_dim:]
mask = mask[:, :, f_dim:]
outputs = outputs.detach().cpu().numpy()
pred = outputs
true = batch_x.detach().cpu().numpy()
preds.append(pred)
trues.append(true)
masks.append(mask.detach().cpu())
if i % 20 == 0:
filled = true[0, :, -1].copy()
filled = filled * mask[0, :, -1].detach().cpu().numpy() + \
pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy())
visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf'))
preds = np.concatenate(preds, 0)
trues = np.concatenate(trues, 0)
masks = np.concatenate(masks, 0)
print('test shape:', preds.shape, trues.shape)
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0])
print('mse:{}, mae:{}'.format(mse, mae))
f = open("result_imputation.txt", 'a')
f.write(setting + " \n")
f.write('mse:{}, mae:{}'.format(mse, mae))
f.write('\n')
f.write('\n')
f.close()
np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
np.save(folder_path + 'pred.npy', preds)
np.save(folder_path + 'true.npy', trues)
return
================================================
FILE: exp/exp_long_term_forecasting.py
================================================
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
from utils.dtw_metric import dtw, accelerated_dtw
from utils.augmentation import run_augmentation, run_augmentation_single
warnings.filterwarnings('ignore')
class Exp_Long_Term_Forecast(Exp_Basic):
def __init__(self, args):
super(Exp_Long_Term_Forecast, self).__init__(args)
def _build_model(self):
model = self.model_dict[self.args.model](self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def vali(self, vali_data, vali_loader, criterion):
total_loss = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
pred = outputs.detach()
true = batch_y.detach()
loss = criterion(pred, true)
total_loss.append(loss.item())
total_loss = np.average(total_loss)
self.model.train()
return total_loss
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
test_data, test_loader = self._get_data(flag='test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
if self.args.use_amp:
scaler = torch.cuda.amp.GradScaler()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
if self.args.use_amp:
scaler.scale(loss).backward()
scaler.step(model_optim)
scaler.update()
else:
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
preds = []
trues = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, :]
batch_y = batch_y[:, -self.args.pred_len:, :].to(self.device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = batch_y.shape
if outputs.shape[-1] != batch_y.shape[-1]:
outputs = np.tile(outputs, [1, 1, int(batch_y.shape[-1] / outputs.shape[-1])])
outputs = test_data.inverse_transform(outputs.reshape(shape[0] * shape[1], -1)).reshape(shape)
batch_y = test_data.inverse_transform(batch_y.reshape(shape[0] * shape[1], -1)).reshape(shape)
outputs = outputs[:, :, f_dim:]
batch_y = batch_y[:, :, f_dim:]
pred = outputs
true = batch_y
preds.append(pred)
trues.append(true)
if i % 20 == 0:
input = batch_x.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = input.shape
input = test_data.inverse_transform(input.reshape(shape[0] * shape[1], -1)).reshape(shape)
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
preds = np.concatenate(preds, axis=0)
trues = np.concatenate(trues, axis=0)
print('test shape:', preds.shape, trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
print('test shape:', preds.shape, trues.shape)
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# dtw calculation
if self.args.use_dtw:
dtw_list = []
manhattan_distance = lambda x, y: np.abs(x - y)
for i in range(preds.shape[0]):
x = preds[i].reshape(-1, 1)
y = trues[i].reshape(-1, 1)
if i % 100 == 0:
print("calculating dtw iter:", i)
d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance)
dtw_list.append(d)
dtw = np.array(dtw_list).mean()
else:
dtw = 'Not calculated'
mae, mse, rmse, mape, mspe = metric(preds, trues)
print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
f = open("result_long_term_forecast.txt", 'a')
f.write(setting + " \n")
f.write('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
f.write('\n')
f.write('\n')
f.close()
np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
np.save(folder_path + 'pred.npy', preds)
np.save(folder_path + 'true.npy', trues)
return
================================================
FILE: exp/exp_short_term_forecasting.py
================================================
from data_provider.data_factory import data_provider
from data_provider.m4 import M4Meta
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.losses import mape_loss, mase_loss, smape_loss
from utils.m4_summary import M4Summary
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
import pandas
warnings.filterwarnings('ignore')
class Exp_Short_Term_Forecast(Exp_Basic):
def __init__(self, args):
super(Exp_Short_Term_Forecast, self).__init__(args)
def _build_model(self):
if self.args.data == 'm4':
self.args.pred_len = M4Meta.horizons_map[self.args.seasonal_patterns] # Up to M4 config
self.args.seq_len = 2 * self.args.pred_len # input_len = 2*pred_len
self.args.label_len = self.args.pred_len
self.args.frequency_map = M4Meta.frequency_map[self.args.seasonal_patterns]
model = self.model_dict[self.args.model](self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self, loss_name='MSE'):
if loss_name == 'MSE':
return nn.MSELoss()
elif loss_name == 'MAPE':
return mape_loss()
elif loss_name == 'MASE':
return mase_loss()
elif loss_name == 'SMAPE':
return smape_loss()
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion(self.args.loss)
mse = nn.MSELoss()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
outputs = self.model(batch_x, None, dec_inp, None)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
batch_y_mark = batch_y_mark[:, -self.args.pred_len:, f_dim:].to(self.device)
loss_value = criterion(batch_x, self.args.frequency_map, outputs, batch_y, batch_y_mark)
loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :]))
loss = loss_value # + loss_sharpness * 1e-5
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(train_loader, vali_loader, criterion)
test_loss = vali_loss
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def vali(self, train_loader, vali_loader, criterion):
x, _ = train_loader.dataset.last_insample_window()
y = vali_loader.dataset.timeseries
x = torch.tensor(x, dtype=torch.float32).to(self.device)
x = x.unsqueeze(-1)
self.model.eval()
with torch.no_grad():
# decoder input
B, _, C = x.shape
dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
# encoder - decoder
outputs = torch.zeros((B, self.args.pred_len, C)).float() # .to(self.device)
id_list = np.arange(0, B, 500) # validation set size
id_list = np.append(id_list, B)
for i in range(len(id_list) - 1):
outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x[id_list[i]:id_list[i + 1]], None,
dec_inp[id_list[i]:id_list[i + 1]],
None).detach().cpu()
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
pred = outputs
true = torch.from_numpy(np.array(y))
batch_y_mark = torch.ones(true.shape)
loss = criterion(x.detach().cpu()[:, :, 0], self.args.frequency_map, pred[:, :, 0], true, batch_y_mark)
self.model.train()
return loss
def test(self, setting, test=0):
_, train_loader = self._get_data(flag='train')
_, test_loader = self._get_data(flag='test')
x, _ = train_loader.dataset.last_insample_window()
y = test_loader.dataset.timeseries
x = torch.tensor(x, dtype=torch.float32).to(self.device)
x = x.unsqueeze(-1)
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
B, _, C = x.shape
dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
# encoder - decoder
outputs = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
id_list = np.arange(0, B, 1)
id_list = np.append(id_list, B)
for i in range(len(id_list) - 1):
outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x[id_list[i]:id_list[i + 1]], None,
dec_inp[id_list[i]:id_list[i + 1]], None)
if id_list[i] % 1000 == 0:
print(id_list[i])
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
outputs = outputs.detach().cpu().numpy()
preds = outputs
trues = y
x = x.detach().cpu().numpy()
for i in range(0, preds.shape[0], preds.shape[0] // 10):
gt = np.concatenate((x[i, :, 0], trues[i]), axis=0)
pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
print('test shape:', preds.shape)
# result save
folder_path = './m4_results/' + self.args.model + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(self.args.pred_len)])
forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
forecasts_df.index.name = 'id'
forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
forecasts_df.to_csv(folder_path + self.args.seasonal_patterns + '_forecast.csv')
print(self.args.model)
file_path = './m4_results/' + self.args.model + '/'
if 'Weekly_forecast.csv' in os.listdir(file_path) \
and 'Monthly_forecast.csv' in os.listdir(file_path) \
and 'Yearly_forecast.csv' in os.listdir(file_path) \
and 'Daily_forecast.csv' in os.listdir(file_path) \
and 'Hourly_forecast.csv' in os.listdir(file_path) \
and 'Quarterly_forecast.csv' in os.listdir(file_path):
m4_summary = M4Summary(file_path, self.args.root_path)
# m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
smape_results, owa_results, mape, mase = m4_summary.evaluate()
print('smape:', smape_results)
print('mape:', mape)
print('mase:', mase)
print('owa:', owa_results)
else:
print('After all 6 tasks are finished, you can calculate the averaged index')
return
================================================
FILE: exp/exp_zero_shot_forecasting.py
================================================
from data_provider.data_factory import data_provider
from exp.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
from utils.dtw_metric import dtw, accelerated_dtw
from utils.augmentation import run_augmentation, run_augmentation_single
warnings.filterwarnings('ignore')
class Exp_Zero_Shot_Forecast(Exp_Basic):
def __init__(self, args):
super(Exp_Zero_Shot_Forecast, self).__init__(args)
def _build_model(self):
model = self.model_dict[self.args.model](self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
preds = []
trues = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
# start_time = time.time()
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
# print("Test cost time: {}".format(time.time() - start_time))
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, :]
batch_y = batch_y[:, -self.args.pred_len:, :].to(self.device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = batch_y.shape
if outputs.shape[-1] != batch_y.shape[-1]:
outputs = np.tile(outputs, [1, 1, int(batch_y.shape[-1] / outputs.shape[-1])])
outputs = test_data.inverse_transform(outputs.reshape(shape[0] * shape[1], -1)).reshape(shape)
batch_y = test_data.inverse_transform(batch_y.reshape(shape[0] * shape[1], -1)).reshape(shape)
outputs = outputs[:, :, f_dim:]
batch_y = batch_y[:, :, f_dim:]
pred = outputs
true = batch_y
preds.append(pred)
trues.append(true)
if i % 20 == 0:
input = batch_x.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = input.shape
input = test_data.inverse_transform(input.reshape(shape[0] * shape[1], -1)).reshape(shape)
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
preds = np.concatenate(preds, axis=0)
trues = np.concatenate(trues, axis=0)
print('test shape:', preds.shape, trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
print('test shape:', preds.shape, trues.shape)
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# dtw calculation
if self.args.use_dtw:
dtw_list = []
manhattan_distance = lambda x, y: np.abs(x - y)
for i in range(preds.shape[0]):
x = preds[i].reshape(-1, 1)
y = trues[i].reshape(-1, 1)
if i % 100 == 0:
print("calculating dtw iter:", i)
d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance)
dtw_list.append(d)
dtw = np.array(dtw_list).mean()
else:
dtw = 'Not calculated'
mae, mse, rmse, mape, mspe = metric(preds, trues)
print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
f = open("result_zero_shot_forecast_search.txt", 'a')
f.write(setting + " \n")
f.write('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
f.write('\n')
f.write('\n')
f.close()
np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
np.save(folder_path + 'pred.npy', preds)
np.save(folder_path + 'true.npy', trues)
return
================================================
FILE: layers/AutoCorrelation.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import math
from math import sqrt
import os
class AutoCorrelation(nn.Module):
"""
AutoCorrelation Mechanism with the following two phases:
(1) period-based dependencies discovery
(2) time delay aggregation
This block can replace the self-attention family mechanism seamlessly.
"""
def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
super(AutoCorrelation, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def time_delay_agg_training(self, values, corr):
"""
SpeedUp version of Autocorrelation (a batch-normalization style design)
This is for the training phase.
"""
head = values.shape[1]
channel = values.shape[2]
length = values.shape[3]
# find top k
top_k = int(self.factor * math.log(length))
mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
# update corr
tmp_corr = torch.softmax(weights, dim=-1)
# aggregation
tmp_values = values
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
pattern = torch.roll(tmp_values, -int(index[i]), -1)
delays_agg = delays_agg + pattern * \
(tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
return delays_agg
def time_delay_agg_inference(self, values, corr):
"""
SpeedUp version of Autocorrelation (a batch-normalization style design)
This is for the inference phase.
"""
batch = values.shape[0]
head = values.shape[1]
channel = values.shape[2]
length = values.shape[3]
# index init
init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(values.device)
# find top k
top_k = int(self.factor * math.log(length))
mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
weights, delay = torch.topk(mean_value, top_k, dim=-1)
# update corr
tmp_corr = torch.softmax(weights, dim=-1)
# aggregation
tmp_values = values.repeat(1, 1, 1, 2)
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
delays_agg = delays_agg + pattern * \
(tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
return delays_agg
def time_delay_agg_full(self, values, corr):
"""
Standard version of Autocorrelation
"""
batch = values.shape[0]
head = values.shape[1]
channel = values.shape[2]
length = values.shape[3]
# index init
init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(values.device)
# find top k
top_k = int(self.factor * math.log(length))
weights, delay = torch.topk(corr, top_k, dim=-1)
# update corr
tmp_corr = torch.softmax(weights, dim=-1)
# aggregation
tmp_values = values.repeat(1, 1, 1, 2)
delays_agg = torch.zeros_like(values).float()
for i in range(top_k):
tmp_delay = init_index + delay[..., i].unsqueeze(-1)
pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
return delays_agg
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
if L > S:
zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
values = torch.cat([values, zeros], dim=1)
keys = torch.cat([keys, zeros], dim=1)
else:
values = values[:, :L, :, :]
keys = keys[:, :L, :, :]
# period-based dependencies
q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
res = q_fft * torch.conj(k_fft)
corr = torch.fft.irfft(res, dim=-1)
# time delay agg
if self.training:
V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
else:
V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
if self.output_attention:
return (V.contiguous(), corr.permute(0, 3, 1, 2))
else:
return (V.contiguous(), None)
class AutoCorrelationLayer(nn.Module):
def __init__(self, correlation, d_model, n_heads, d_keys=None,
d_values=None):
super(AutoCorrelationLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_correlation = correlation
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_correlation(
queries,
keys,
values,
attn_mask
)
out = out.view(B, L, -1)
return self.out_projection(out), attn
================================================
FILE: layers/Autoformer_EncDec.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class my_Layernorm(nn.Module):
"""
Special designed layernorm for the seasonal part
"""
def __init__(self, channels):
super(my_Layernorm, self).__init__()
self.layernorm = nn.LayerNorm(channels)
def forward(self, x):
x_hat = self.layernorm(x)
bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
return x_hat - bias
class moving_avg(nn.Module):
"""
Moving average block to highlight the trend of time series
"""
def __init__(self, kernel_size, stride):
super(moving_avg, self).__init__()
self.kernel_size = kernel_size
self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
def forward(self, x):
# padding on the both ends of time series
front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
x = torch.cat([front, x, end], dim=1)
x = self.avg(x.permute(0, 2, 1))
x = x.permute(0, 2, 1)
return x
class series_decomp(nn.Module):
"""
Series decomposition block
"""
def __init__(self, kernel_size):
super(series_decomp, self).__init__()
self.moving_avg = moving_avg(kernel_size, stride=1)
def forward(self, x):
moving_mean = self.moving_avg(x)
res = x - moving_mean
return res, moving_mean
class series_decomp_multi(nn.Module):
"""
Multiple Series decomposition block from FEDformer
"""
def __init__(self, kernel_size):
super(series_decomp_multi, self).__init__()
self.kernel_size = kernel_size
self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
def forward(self, x):
moving_mean = []
res = []
for func in self.series_decomp:
sea, moving_avg = func(x)
moving_mean.append(moving_avg)
res.append(sea)
sea = sum(res) / len(res)
moving_mean = sum(moving_mean) / len(moving_mean)
return sea, moving_mean
class EncoderLayer(nn.Module):
"""
Autoformer encoder layer with the progressive decomposition architecture
"""
def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
self.decomp1 = series_decomp(moving_avg)
self.decomp2 = series_decomp(moving_avg)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask
)
x = x + self.dropout(new_x)
x, _ = self.decomp1(x)
y = x
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
res, _ = self.decomp2(x + y)
return res, attn
class Encoder(nn.Module):
"""
Autoformer encoder
"""
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
def forward(self, x, attn_mask=None):
attns = []
if self.conv_layers is not None:
for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
x, attn = attn_layer(x, attn_mask=attn_mask)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
class DecoderLayer(nn.Module):
"""
Autoformer decoder layer with the progressive decomposition architecture
"""
def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
moving_avg=25, dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
self.decomp1 = series_decomp(moving_avg)
self.decomp2 = series_decomp(moving_avg)
self.decomp3 = series_decomp(moving_avg)
self.dropout = nn.Dropout(dropout)
self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
padding_mode='circular', bias=False)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None):
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask
)[0])
x, trend1 = self.decomp1(x)
x = x + self.dropout(self.cross_attention(
x, cross, cross,
attn_mask=cross_mask
)[0])
x, trend2 = self.decomp2(x)
y = x
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
x, trend3 = self.decomp3(x + y)
residual_trend = trend1 + trend2 + trend3
residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
return x, residual_trend
class Decoder(nn.Module):
"""
Autoformer encoder
"""
def __init__(self, layers, norm_layer=None, projection=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
self.projection = projection
def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
for layer in self.layers:
x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
trend = trend + residual_trend
if self.norm is not None:
x = self.norm(x)
if self.projection is not None:
x = self.projection(x)
return x, trend
================================================
FILE: layers/Conv_Blocks.py
================================================
import torch
import torch.nn as nn
class Inception_Block_V1(nn.Module):
def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
super(Inception_Block_V1, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.num_kernels = num_kernels
kernels = []
for i in range(self.num_kernels):
kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i))
self.kernels = nn.ModuleList(kernels)
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
res_list = []
for i in range(self.num_kernels):
res_list.append(self.kernels[i](x))
res = torch.stack(res_list, dim=-1).mean(-1)
return res
class Inception_Block_V2(nn.Module):
def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
super(Inception_Block_V2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.num_kernels = num_kernels
kernels = []
for i in range(self.num_kernels // 2):
kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1]))
kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0]))
kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
self.kernels = nn.ModuleList(kernels)
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
res_list = []
for i in range(self.num_kernels // 2 * 2 + 1):
res_list.append(self.kernels[i](x))
res = torch.stack(res_list, dim=-1).mean(-1)
return res
================================================
FILE: layers/Crossformer_EncDec.py
================================================
import torch
import torch.nn as nn
from einops import rearrange, repeat
from layers.SelfAttention_Family import TwoStageAttentionLayer
class SegMerging(nn.Module):
def __init__(self, d_model, win_size, norm_layer=nn.LayerNorm):
super().__init__()
self.d_model = d_model
self.win_size = win_size
self.linear_trans = nn.Linear(win_size * d_model, d_model)
self.norm = norm_layer(win_size * d_model)
def forward(self, x):
batch_size, ts_d, seg_num, d_model = x.shape
pad_num = seg_num % self.win_size
if pad_num != 0:
pad_num = self.win_size - pad_num
x = torch.cat((x, x[:, :, -pad_num:, :]), dim=-2)
seg_to_merge = []
for i in range(self.win_size):
seg_to_merge.append(x[:, :, i::self.win_size, :])
x = torch.cat(seg_to_merge, -1)
x = self.norm(x)
x = self.linear_trans(x)
return x
class scale_block(nn.Module):
def __init__(self, configs, win_size, d_model, n_heads, d_ff, depth, dropout, \
seg_num=10, factor=10):
super(scale_block, self).__init__()
if win_size > 1:
self.merge_layer = SegMerging(d_model, win_size, nn.LayerNorm)
else:
self.merge_layer = None
self.encode_layers = nn.ModuleList()
for i in range(depth):
self.encode_layers.append(TwoStageAttentionLayer(configs, seg_num, factor, d_model, n_heads, \
d_ff, dropout))
def forward(self, x, attn_mask=None, tau=None, delta=None):
_, ts_dim, _, _ = x.shape
if self.merge_layer is not None:
x = self.merge_layer(x)
for layer in self.encode_layers:
x = layer(x)
return x, None
class Encoder(nn.Module):
def __init__(self, attn_layers):
super(Encoder, self).__init__()
self.encode_blocks = nn.ModuleList(attn_layers)
def forward(self, x):
encode_x = []
encode_x.append(x)
for block in self.encode_blocks:
x, attns = block(x)
encode_x.append(x)
return encode_x, None
class DecoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, seg_len, d_model, d_ff=None, dropout=0.1):
super(DecoderLayer, self).__init__()
self.self_attention = self_attention
self.cross_attention = cross_attention
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.MLP1 = nn.Sequential(nn.Linear(d_model, d_model),
nn.GELU(),
nn.Linear(d_model, d_model))
self.linear_pred = nn.Linear(d_model, seg_len)
def forward(self, x, cross):
batch = x.shape[0]
x = self.self_attention(x)
x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model')
cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model')
tmp, attn = self.cross_attention(x, cross, cross, None, None, None,)
x = x + self.dropout(tmp)
y = x = self.norm1(x)
y = self.MLP1(y)
dec_output = self.norm2(x + y)
dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b=batch)
layer_predict = self.linear_pred(dec_output)
layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len')
return dec_output, layer_predict
class Decoder(nn.Module):
def __init__(self, layers):
super(Decoder, self).__init__()
self.decode_layers = nn.ModuleList(layers)
def forward(self, x, cross):
final_predict = None
i = 0
ts_d = x.shape[1]
for layer in self.decode_layers:
cross_enc = cross[i]
x, layer_predict = layer(x, cross_enc)
if final_predict is None:
final_predict = layer_predict
else:
final_predict = final_predict + layer_predict
i += 1
final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d=ts_d)
return final_predict
================================================
FILE: layers/DWT_Decomposition.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 5
@author: Murad
SISLab, USF
mmurad@usf.edu
https://github.com/Secure-and-Intelligent-Systems-Lab/WPMixer
"""
import torch
import torch.nn as nn
import pywt
import numpy as np
import torch.nn.functional as F
from torch.autograd import Function
class Decomposition(nn.Module):
def __init__(self,
input_length=[],
pred_length=[],
wavelet_name=[],
level=[],
batch_size=[],
channel=[],
d_model=[],
tfactor=[],
dfactor=[],
device=[],
no_decomposition=[],
use_amp=[]):
super(Decomposition, self).__init__()
self.input_length = input_length
self.pred_length = pred_length
self.wavelet_name = wavelet_name
self.level = level
self.batch_size = batch_size
self.channel = channel
self.d_model = d_model
self.device = device
self.no_decomposition = no_decomposition
self.use_amp = use_amp
self.eps = 1e-5
self.dwt = DWT1DForward(wave=self.wavelet_name, J=self.level,
use_amp=self.use_amp).cuda() if self.device.type == 'cuda' else DWT1DForward(
wave=self.wavelet_name, J=self.level, use_amp=self.use_amp)
self.idwt = DWT1DInverse(wave=self.wavelet_name,
use_amp=self.use_amp).cuda() if self.device.type == 'cuda' else DWT1DInverse(
wave=self.wavelet_name, use_amp=self.use_amp)
self.input_w_dim = self._dummy_forward(self.input_length) if not self.no_decomposition else [
self.input_length] # length of the input seq after decompose
self.pred_w_dim = self._dummy_forward(self.pred_length) if not self.no_decomposition else [
self.pred_length] # required length of the pred seq after decom
self.tfactor = tfactor
self.dfactor = dfactor
#################################
self.affine = False
#################################
if self.affine:
self._init_params()
def transform(self, x):
# input: x shape: batch, channel, seq
if not self.no_decomposition:
yl, yh = self._wavelet_decompose(x)
else:
yl, yh = x, [] # no decompose: returning the same value in yl
return yl, yh
def inv_transform(self, yl, yh):
if not self.no_decomposition:
x = self._wavelet_reverse_decompose(yl, yh)
else:
x = yl # no decompose: returning the same value in x
return x
def _dummy_forward(self, input_length):
dummy_x = torch.ones((self.batch_size, self.channel, input_length)).to(self.device)
yl, yh = self.dwt(dummy_x)
l = []
l.append(yl.shape[-1])
for i in range(len(yh)):
l.append(yh[i].shape[-1])
return l
def _init_params(self):
self.affine_weight = nn.Parameter(torch.ones((self.level + 1, self.channel)))
self.affine_bias = nn.Parameter(torch.zeros((self.level + 1, self.channel)))
def _wavelet_decompose(self, x):
# input: x shape: batch, channel, seq
yl, yh = self.dwt(x)
if self.affine:
yl = yl.transpose(1, 2) # batch, seq, channel
yl = yl * self.affine_weight[0]
yl = yl + self.affine_bias[0]
yl = yl.transpose(1, 2) # batch, channel, seq
for i in range(self.level):
yh_ = yh[i].transpose(1, 2) # batch, seq, channel
yh_ = yh_ * self.affine_weight[i + 1]
yh_ = yh_ + self.affine_bias[i + 1]
yh[i] = yh_.transpose(1, 2) # batch, channel, seq
return yl, yh
def _wavelet_reverse_decompose(self, yl, yh):
if self.affine:
yl = yl.transpose(1, 2) # batch, seq, channel
yl = yl - self.affine_bias[0]
yl = yl / (self.affine_weight[0] + self.eps)
yl = yl.transpose(1, 2) # batch, channel, seq
for i in range(self.level):
yh_ = yh[i].transpose(1, 2) # batch, seq, channel
yh_ = yh_ - self.affine_bias[i + 1]
yh_ = yh_ / (self.affine_weight[i + 1] + self.eps)
yh[i] = yh_.transpose(1, 2) # batch, channel, seq
x = self.idwt((yl, yh))
return x # shape: batch, channel, seq
###############################################################################################
"""
Following codes are combined from https://github.com/fbcotter/pytorch_wavelets.
To use Wavelet decomposition, you do not need to modify any of the codes below this line,
we can just play with the class Decomposition(above)
"""
###############################################################################################
class DWT1DForward(nn.Module):
""" Performs a 1d DWT Forward decomposition of an image
Args:
J (int): Number of levels of decomposition
wave (str or pywt.Wavelet or tuple(ndarray)): Which wavelet to use.
Can be:
1) a string to pass to pywt.Wavelet constructor
2) a pywt.Wavelet class
3) a tuple of numpy arrays (h0, h1)
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. The
padding scheme
"""
def __init__(self, J=1, wave='db1', mode='zero', use_amp=False):
super().__init__()
self.use_amp = use_amp
if isinstance(wave, str):
wave = pywt.Wavelet(wave)
if isinstance(wave, pywt.Wavelet):
h0, h1 = wave.dec_lo, wave.dec_hi
else:
assert len(wave) == 2
h0, h1 = wave[0], wave[1]
# Prepare the filters - this makes them into column filters
filts = prep_filt_afb1d(h0, h1)
self.register_buffer('h0', filts[0])
self.register_buffer('h1', filts[1])
self.J = J
self.mode = mode
def forward(self, x):
""" Forward pass of the DWT.
Args:
x (tensor): Input of shape :math:`(N, C_{in}, L_{in})`
Returns:
(yl, yh)
tuple of lowpass (yl) and bandpass (yh) coefficients.
yh is a list of length J with the first entry
being the finest scale coefficients.
"""
assert x.ndim == 3, "Can only handle 3d inputs (N, C, L)"
highs = []
x0 = x
mode = mode_to_int(self.mode)
# Do a multilevel transform
for j in range(self.J):
x0, x1 = AFB1D.apply(x0, self.h0, self.h1, mode, self.use_amp)
highs.append(x1)
return x0, highs
class DWT1DInverse(nn.Module):
""" Performs a 1d DWT Inverse reconstruction of an image
Args:
wave (str or pywt.Wavelet or tuple(ndarray)): Which wavelet to use.
Can be:
1) a string to pass to pywt.Wavelet constructor
2) a pywt.Wavelet class
3) a tuple of numpy arrays (h0, h1)
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. The
padding scheme
"""
def __init__(self, wave='db1', mode='zero', use_amp=False):
super().__init__()
self.use_amp = use_amp
if isinstance(wave, str):
wave = pywt.Wavelet(wave)
if isinstance(wave, pywt.Wavelet):
g0, g1 = wave.rec_lo, wave.rec_hi
else:
assert len(wave) == 2
g0, g1 = wave[0], wave[1]
# Prepare the filters
filts = prep_filt_sfb1d(g0, g1)
self.register_buffer('g0', filts[0])
self.register_buffer('g1', filts[1])
self.mode = mode
def forward(self, coeffs):
"""
Args:
coeffs (yl, yh): tuple of lowpass and bandpass coefficients, should
match the format returned by DWT1DForward.
Returns:
Reconstructed input of shape :math:`(N, C_{in}, L_{in})`
Note:
Can have None for any of the highpass scales and will treat the
values as zeros (not in an efficient way though).
"""
x0, highs = coeffs
assert x0.ndim == 3, "Can only handle 3d inputs (N, C, L)"
mode = mode_to_int(self.mode)
# Do a multilevel inverse transform
for x1 in highs[::-1]:
if x1 is None:
x1 = torch.zeros_like(x0)
# 'Unpad' added signal
if x0.shape[-1] > x1.shape[-1]:
x0 = x0[..., :-1]
x0 = SFB1D.apply(x0, x1, self.g0, self.g1, mode, self.use_amp)
return x0
def roll(x, n, dim, make_even=False):
if n < 0:
n = x.shape[dim] + n
if make_even and x.shape[dim] % 2 == 1:
end = 1
else:
end = 0
if dim == 0:
return torch.cat((x[-n:], x[:-n + end]), dim=0)
elif dim == 1:
return torch.cat((x[:, -n:], x[:, :-n + end]), dim=1)
elif dim == 2 or dim == -2:
return torch.cat((x[:, :, -n:], x[:, :, :-n + end]), dim=2)
elif dim == 3 or dim == -1:
return torch.cat((x[:, :, :, -n:], x[:, :, :, :-n + end]), dim=3)
def mypad(x, pad, mode='constant', value=0):
""" Function to do numpy like padding on tensors. Only works for 2-D
padding.
Inputs:
x (tensor): tensor to pad
pad (tuple): tuple of (left, right, top, bottom) pad sizes
mode (str): 'symmetric', 'wrap', 'constant, 'reflect', 'replicate', or
'zero'. The padding technique.
"""
if mode == 'symmetric':
# Vertical only
if pad[0] == 0 and pad[1] == 0:
m1, m2 = pad[2], pad[3]
l = x.shape[-2]
xe = reflect(np.arange(-m1, l + m2, dtype='int32'), -0.5, l - 0.5)
return x[:, :, xe]
# horizontal only
elif pad[2] == 0 and pad[3] == 0:
m1, m2 = pad[0], pad[1]
l = x.shape[-1]
xe = reflect(np.arange(-m1, l + m2, dtype='int32'), -0.5, l - 0.5)
return x[:, :, :, xe]
# Both
else:
m1, m2 = pad[0], pad[1]
l1 = x.shape[-1]
xe_row = reflect(np.arange(-m1, l1 + m2, dtype='int32'), -0.5, l1 - 0.5)
m1, m2 = pad[2], pad[3]
l2 = x.shape[-2]
xe_col = reflect(np.arange(-m1, l2 + m2, dtype='int32'), -0.5, l2 - 0.5)
i = np.outer(xe_col, np.ones(xe_row.shape[0]))
j = np.outer(np.ones(xe_col.shape[0]), xe_row)
return x[:, :, i, j]
elif mode == 'periodic':
# Vertical only
if pad[0] == 0 and pad[1] == 0:
xe = np.arange(x.shape[-2])
xe = np.pad(xe, (pad[2], pad[3]), mode='wrap')
return x[:, :, xe]
# Horizontal only
elif pad[2] == 0 and pad[3] == 0:
xe = np.arange(x.shape[-1])
xe = np.pad(xe, (pad[0], pad[1]), mode='wrap')
return x[:, :, :, xe]
# Both
else:
xe_col = np.arange(x.shape[-2])
xe_col = np.pad(xe_col, (pad[2], pad[3]), mode='wrap')
xe_row = np.arange(x.shape[-1])
xe_row = np.pad(xe_row, (pad[0], pad[1]), mode='wrap')
i = np.outer(xe_col, np.ones(xe_row.shape[0]))
j = np.outer(np.ones(xe_col.shape[0]), xe_row)
return x[:, :, i, j]
elif mode == 'constant' or mode == 'reflect' or mode == 'replicate':
return F.pad(x, pad, mode, value)
elif mode == 'zero':
return F.pad(x, pad)
else:
raise ValueError("Unkown pad type: {}".format(mode))
def afb1d(x, h0, h1, use_amp, mode='zero', dim=-1):
""" 1D analysis filter bank (along one dimension only) of an image
Inputs:
x (tensor): 4D input with the last two dimensions the spatial input
h0 (tensor): 4D input for the lowpass filter. Should have shape (1, 1,
h, 1) or (1, 1, 1, w)
h1 (tensor): 4D input for the highpass filter. Should have shape (1, 1,
h, 1) or (1, 1, 1, w)
mode (str): padding method
dim (int) - dimension of filtering. d=2 is for a vertical filter (called
column filtering but filters across the rows). d=3 is for a
horizontal filter, (called row filtering but filters across the
columns).
Returns:
lohi: lowpass and highpass subbands concatenated along the channel
dimension
"""
C = x.shape[1]
# Convert the dim to positive
d = dim % 4
s = (2, 1) if d == 2 else (1, 2)
N = x.shape[d]
# If h0, h1 are not tensors, make them. If they are, then assume that they
# are in the right order
if not isinstance(h0, torch.Tensor):
h0 = torch.tensor(np.copy(np.array(h0).ravel()[::-1]),
dtype=torch.float, device=x.device)
if not isinstance(h1, torch.Tensor):
h1 = torch.tensor(np.copy(np.array(h1).ravel()[::-1]),
dtype=torch.float, device=x.device)
L = h0.numel()
L2 = L // 2
shape = [1, 1, 1, 1]
shape[d] = L
# If h aren't in the right shape, make them so
if h0.shape != tuple(shape):
h0 = h0.reshape(*shape)
if h1.shape != tuple(shape):
h1 = h1.reshape(*shape)
h = torch.cat([h0, h1] * C, dim=0)
if mode == 'per' or mode == 'periodization':
if x.shape[dim] % 2 == 1:
if d == 2:
x = torch.cat((x, x[:, :, -1:]), dim=2)
else:
x = torch.cat((x, x[:, :, :, -1:]), dim=3)
N += 1
x = roll(x, -L2, dim=d)
pad = (L - 1, 0) if d == 2 else (0, L - 1)
if use_amp:
with torch.cuda.amp.autocast(): # for mixed precision
lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
else:
lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
N2 = N // 2
if d == 2:
lohi[:, :, :L2] = lohi[:, :, :L2] + lohi[:, :, N2:N2 + L2]
lohi = lohi[:, :, :N2]
else:
lohi[:, :, :, :L2] = lohi[:, :, :, :L2] + lohi[:, :, :, N2:N2 + L2]
lohi = lohi[:, :, :, :N2]
else:
# Calculate the pad size
outsize = pywt.dwt_coeff_len(N, L, mode=mode)
p = 2 * (outsize - 1) - N + L
if mode == 'zero':
# Sadly, pytorch only allows for same padding before and after, if
# we need to do more padding after for odd length signals, have to
# prepad
if p % 2 == 1:
pad = (0, 0, 0, 1) if d == 2 else (0, 1, 0, 0)
x = F.pad(x, pad)
pad = (p // 2, 0) if d == 2 else (0, p // 2)
# Calculate the high and lowpass
if use_amp:
with torch.cuda.amp.autocast():
lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
else:
lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
elif mode == 'symmetric' or mode == 'reflect' or mode == 'periodic':
pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
x = mypad(x, pad=pad, mode=mode)
if use_amp:
with torch.cuda.amp.autocast():
lohi = F.conv2d(x, h, stride=s, groups=C)
else:
lohi = F.conv2d(x, h, stride=s, groups=C)
else:
raise ValueError("Unkown pad type: {}".format(mode))
return lohi
def afb1d_atrous(x, h0, h1, mode='periodic', dim=-1, dilation=1):
""" 1D analysis filter bank (along one dimension only) of an image without
downsampling. Does the a trous algorithm.
Inputs:
x (tensor): 4D input with the last two dimensions the spatial input
h0 (tensor): 4D input for the lowpass filter. Should have shape (1, 1,
h, 1) or (1, 1, 1, w)
h1 (tensor): 4D input for the highpass filter. Should have shape (1, 1,
h, 1) or (1, 1, 1, w)
mode (str): padding method
dim (int) - dimension of filtering. d=2 is for a vertical filter (called
column filtering but filters across the rows). d=3 is for a
horizontal filter, (called row filtering but filters across the
columns).
dilation (int): dilation factor. Should be a power of 2.
Returns:
lohi: lowpass and highpass subbands concatenated along the channel
dimension
"""
C = x.shape[1]
# Convert the dim to positive
d = dim % 4
# If h0, h1 are not tensors, make them. If they are, then assume that they
# are in the right order
if not isinstance(h0, torch.Tensor):
h0 = torch.tensor(np.copy(np.array(h0).ravel()[::-1]),
dtype=torch.float, device=x.device)
if not isinstance(h1, torch.Tensor):
h1 = torch.tensor(np.copy(np.array(h1).ravel()[::-1]),
dtype=torch.float, device=x.device)
L = h0.numel()
shape = [1, 1, 1, 1]
shape[d] = L
# If h aren't in the right shape, make them so
if h0.shape != tuple(shape):
h0 = h0.reshape(*shape)
if h1.shape != tuple(shape):
h1 = h1.reshape(*shape)
h = torch.cat([h0, h1] * C, dim=0)
# Calculate the pad size
L2 = (L * dilation) // 2
pad = (0, 0, L2 - dilation, L2) if d == 2 else (L2 - dilation, L2, 0, 0)
x = mypad(x, pad=pad, mode=mode)
lohi = F.conv2d(x, h, groups=C, dilation=dilation)
return lohi
def sfb1d(lo, hi, g0, g1, use_amp, mode='zero', dim=-1):
""" 1D synthesis filter bank of an image tensor
"""
C = lo.shape[1]
d = dim % 4
# If g0, g1 are not tensors, make them. If they are, then assume that they
# are in the right order
if not isinstance(g0, torch.Tensor):
g0 = torch.tensor(np.copy(np.array(g0).ravel()),
dtype=torch.float, device=lo.device)
if not isinstance(g1, torch.Tensor):
g1 = torch.tensor(np.copy(np.array(g1).ravel()),
dtype=torch.float, device=lo.device)
L = g0.numel()
shape = [1, 1, 1, 1]
shape[d] = L
N = 2 * lo.shape[d]
# If g aren't in the right shape, make them so
if g0.shape != tuple(shape):
g0 = g0.reshape(*shape)
if g1.shape != tuple(shape):
g1 = g1.reshape(*shape)
s = (2, 1) if d == 2 else (1, 2)
g0 = torch.cat([g0] * C, dim=0)
g1 = torch.cat([g1] * C, dim=0)
if mode == 'per' or mode == 'periodization':
if use_amp:
with torch.cuda.amp.autocast():
y = F.conv_transpose2d(lo, g0, stride=s, groups=C) + \
F.conv_transpose2d(hi, g1, stride=s, groups=C)
else:
y = F.conv_transpose2d(lo, g0, stride=s, groups=C) + \
F.conv_transpose2d(hi, g1, stride=s, groups=C)
if d == 2:
y[:, :, :L - 2] = y[:, :, :L - 2] + y[:, :, N:N + L - 2]
y = y[:, :, :N]
else:
y[:, :, :, :L - 2] = y[:, :, :, :L - 2] + y[:, :, :, N:N + L - 2]
y = y[:, :, :, :N]
y = roll(y, 1 - L // 2, dim=dim)
else:
if mode == 'zero' or mode == 'symmetric' or mode == 'reflect' or \
mode == 'periodic':
pad = (L - 2, 0) if d == 2 else (0, L - 2)
if use_amp:
with torch.cuda.amp.autocast():
y = F.conv_transpose2d(lo, g0, stride=s, padding=pad, groups=C) + \
F.conv_transpose2d(hi, g1, stride=s, padding=pad, groups=C)
else:
y = F.conv_transpose2d(lo, g0, stride=s, padding=pad, groups=C) + \
F.conv_transpose2d(hi, g1, stride=s, padding=pad, groups=C)
else:
raise ValueError("Unkown pad type: {}".format(mode))
return y
def mode_to_int(mode):
if mode == 'zero':
return 0
elif mode == 'symmetric':
return 1
elif mode == 'per' or mode == 'periodization':
return 2
elif mode == 'constant':
return 3
elif mode == 'reflect':
return 4
elif mode == 'replicate':
return 5
elif mode == 'periodic':
return 6
else:
raise ValueError("Unkown pad type: {}".format(mode))
def int_to_mode(mode):
if mode == 0:
return 'zero'
elif mode == 1:
return 'symmetric'
elif mode == 2:
return 'periodization'
elif mode == 3:
return 'constant'
elif mode == 4:
return 'reflect'
elif mode == 5:
return 'replicate'
elif mode == 6:
return 'periodic'
else:
raise ValueError("Unkown pad type: {}".format(mode))
class AFB2D(Function):
""" Does a single level 2d wavelet decomposition of an input. Does separate
row and column filtering by two calls to
:py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
Needs to have the tensors in the right form. Because this function defines
its own backward pass, saves on memory by not having to save the input
tensors.
Inputs:
x (torch.Tensor): Input to decompose
h0_row: row lowpass
h1_row: row highpass
h0_col: col lowpass
h1_col: col highpass
mode (int): use mode_to_int to get the int code here
We encode the mode as an integer rather than a string as gradcheck causes an
error when a string is provided.
Returns:
y: Tensor of shape (N, C*4, H, W)
"""
@staticmethod
def forward(ctx, x, h0_row, h1_row, h0_col, h1_col, mode):
ctx.save_for_backward(h0_row, h1_row, h0_col, h1_col)
ctx.shape = x.shape[-2:]
mode = int_to_mode(mode)
ctx.mode = mode
lohi = afb1d(x, h0_row, h1_row, mode=mode, dim=3)
y = afb1d(lohi, h0_col, h1_col, mode=mode, dim=2)
s = y.shape
y = y.reshape(s[0], -1, 4, s[-2], s[-1])
low = y[:, :, 0].contiguous()
highs = y[:, :, 1:].contiguous()
return low, highs
@staticmethod
def backward(ctx, low, highs):
dx = None
if ctx.needs_input_grad[0]:
mode = ctx.mode
h0_row, h1_row, h0_col, h1_col = ctx.saved_tensors
lh, hl, hh = torch.unbind(highs, dim=2)
lo = sfb1d(low, lh, h0_col, h1_col, mode=mode, dim=2)
hi = sfb1d(hl, hh, h0_col, h1_col, mode=mode, dim=2)
dx = sfb1d(lo, hi, h0_row, h1_row, mode=mode, dim=3)
if dx.shape[-2] > ctx.shape[-2] and dx.shape[-1] > ctx.shape[-1]:
dx = dx[:, :, :ctx.shape[-2], :ctx.shape[-1]]
elif dx.shape[-2] > ctx.shape[-2]:
dx = dx[:, :, :ctx.shape[-2]]
elif dx.shape[-1] > ctx.shape[-1]:
dx = dx[:, :, :, :ctx.shape[-1]]
return dx, None, None, None, None, None
class AFB1D(Function):
""" Does a single level 1d wavelet decomposition of an input.
Needs to have the tensors in the right form. Because this function defines
its own backward pass, saves on memory by not having to save the input
tensors.
Inputs:
x (torch.Tensor): Input to decompose
h0: lowpass
h1: highpass
mode (int): use mode_to_int to get the int code here
We encode the mode as an integer rather than a string as gradcheck causes an
error when a string is provided.
Returns:
x0: Tensor of shape (N, C, L') - lowpass
x1: Tensor of shape (N, C, L') - highpass
"""
@staticmethod
def forward(ctx, x, h0, h1, mode, use_amp):
mode = int_to_mode(mode)
# Make inputs 4d
x = x[:, :, None, :]
h0 = h0[:, :, None, :]
h1 = h1[:, :, None, :]
# Save for backwards
ctx.save_for_backward(h0, h1)
ctx.shape = x.shape[3]
ctx.mode = mode
ctx.use_amp = use_amp
lohi = afb1d(x, h0, h1, use_amp, mode=mode, dim=3)
x0 = lohi[:, ::2, 0].contiguous()
x1 = lohi[:, 1::2, 0].contiguous()
return x0, x1
@staticmethod
def backward(ctx, dx0, dx1):
dx = None
if ctx.needs_input_grad[0]:
mode = ctx.mode
h0, h1 = ctx.saved_tensors
use_amp = ctx.use_amp
# Make grads 4d
dx0 = dx0[:, :, None, :]
dx1 = dx1[:, :, None, :]
dx = sfb1d(dx0, dx1, h0, h1, use_amp, mode=mode, dim=3)[:, :, 0]
# Check for odd input
if dx.shape[2] > ctx.shape:
dx = dx[:, :, :ctx.shape]
return dx, None, None, None, None, None
def afb2d(x, filts, mode='zero'):
""" Does a single level 2d wavelet decomposition of an input. Does separate
row and column filtering by two calls to
:py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
Inputs:
x (torch.Tensor): Input to decompose
filts (list of ndarray or torch.Tensor): If a list of tensors has been
given, this function assumes they are in the right form (the form
returned by
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`).
Otherwise, this function will prepare the filters to be of the right
form by calling
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`.
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
padding to use. If periodization, the output size will be half the
input size. Otherwise, the output size will be slightly larger than
half.
Returns:
y: Tensor of shape (N, C*4, H, W)
"""
tensorize = [not isinstance(f, torch.Tensor) for f in filts]
if len(filts) == 2:
h0, h1 = filts
if True in tensorize:
h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
h0, h1, device=x.device)
else:
h0_col = h0
h0_row = h0.transpose(2, 3)
h1_col = h1
h1_row = h1.transpose(2, 3)
elif len(filts) == 4:
if True in tensorize:
h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
*filts, device=x.device)
else:
h0_col, h1_col, h0_row, h1_row = filts
else:
raise ValueError("Unknown form for input filts")
lohi = afb1d(x, h0_row, h1_row, mode=mode, dim=3)
y = afb1d(lohi, h0_col, h1_col, mode=mode, dim=2)
return y
def afb2d_atrous(x, filts, mode='periodization', dilation=1):
""" Does a single level 2d wavelet decomposition of an input. Does separate
row and column filtering by two calls to
:py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
Inputs:
x (torch.Tensor): Input to decompose
filts (list of ndarray or torch.Tensor): If a list of tensors has been
given, this function assumes they are in the right form (the form
returned by
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`).
Otherwise, this function will prepare the filters to be of the right
form by calling
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`.
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
padding to use. If periodization, the output size will be half the
input size. Otherwise, the output size will be slightly larger than
half.
dilation (int): dilation factor for the filters. Should be 2**level
Returns:
y: Tensor of shape (N, C, 4, H, W)
"""
tensorize = [not isinstance(f, torch.Tensor) for f in filts]
if len(filts) == 2:
h0, h1 = filts
if True in tensorize:
h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
h0, h1, device=x.device)
else:
h0_col = h0
h0_row = h0.transpose(2, 3)
h1_col = h1
h1_row = h1.transpose(2, 3)
elif len(filts) == 4:
if True in tensorize:
h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
*filts, device=x.device)
else:
h0_col, h1_col, h0_row, h1_row = filts
else:
raise ValueError("Unknown form for input filts")
lohi = afb1d_atrous(x, h0_row, h1_row, mode=mode, dim=3, dilation=dilation)
y = afb1d_atrous(lohi, h0_col, h1_col, mode=mode, dim=2, dilation=dilation)
return y
def afb2d_nonsep(x, filts, mode='zero'):
""" Does a 1 level 2d wavelet decomposition of an input. Doesn't do separate
row and column filtering.
Inputs:
x (torch.Tensor): Input to decompose
filts (list or torch.Tensor): If a list is given, should be the low and
highpass filter banks. If a tensor is given, it should be of the
form created by
:py:func:`pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d_nonsep`
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
padding to use. If periodization, the output size will be half the
input size. Otherwise, the output size will be slightly larger than
half.
Returns:
y: Tensor of shape (N, C, 4, H, W)
"""
C = x.shape[1]
Ny = x.shape[2]
Nx = x.shape[3]
# Check the filter inputs
if isinstance(filts, (tuple, list)):
if len(filts) == 2:
filts = prep_filt_afb2d_nonsep(filts[0], filts[1], device=x.device)
else:
filts = prep_filt_afb2d_nonsep(
filts[0], filts[1], filts[2], filts[3], device=x.device)
f = torch.cat([filts] * C, dim=0)
Ly = f.shape[2]
Lx = f.shape[3]
if mode == 'periodization' or mode == 'per':
if x.shape[2] % 2 == 1:
x = torch.cat((x, x[:, :, -1:]), dim=2)
Ny += 1
if x.shape[3] % 2 == 1:
x = torch.cat((x, x[:, :, :, -1:]), dim=3)
Nx += 1
pad = (Ly - 1, Lx - 1)
stride = (2, 2)
x = roll(roll(x, -Ly // 2, dim=2), -Lx // 2, dim=3)
y = F.conv2d(x, f, padding=pad, stride=stride, groups=C)
y[:, :, :Ly // 2] += y[:, :, Ny // 2:Ny // 2 + Ly // 2]
y[:, :, :, :Lx // 2] += y[:, :, :, Nx // 2:Nx // 2 + Lx // 2]
y = y[:, :, :Ny // 2, :Nx // 2]
elif mode == 'zero' or mode == 'symmetric' or mode == 'reflect':
# Calculate the pad size
out1 = pywt.dwt_coeff_len(Ny, Ly, mode=mode)
out2 = pywt.dwt_coeff_len(Nx, Lx, mode=mode)
p1 = 2 * (out1 - 1) - Ny + Ly
p2 = 2 * (out2 - 1) - Nx + Lx
if mode == 'zero':
# Sadly, pytorch only allows for same padding before and after, if
# we need to do more padding after for odd length signals, have to
# prepad
if p1 % 2 == 1 and p2 % 2 == 1:
x = F.pad(x, (0, 1, 0, 1))
elif p1 % 2 == 1:
x = F.pad(x, (0, 0, 0, 1))
elif p2 % 2 == 1:
x = F.pad(x, (0, 1, 0, 0))
# Calculate the high and lowpass
y = F.conv2d(
x, f, padding=(p1 // 2, p2 // 2), stride=2, groups=C)
elif mode == 'symmetric' or mode == 'reflect' or mode == 'periodic':
pad = (p2 // 2, (p2 + 1) // 2, p1 // 2, (p1 + 1) // 2)
x = mypad(x, pad=pad, mode=mode)
y = F.conv2d(x, f, stride=2, groups=C)
else:
raise ValueError("Unkown pad type: {}".format(mode))
return y
def sfb2d(ll, lh, hl, hh, filts, mode='zero'):
""" Does a single level 2d wavelet reconstruction of wavelet coefficients.
Does separate row and column filtering by two calls to
:py:func:`pytorch_wavelets.dwt.lowlevel.sfb1d`
Inputs:
ll (torch.Tensor): lowpass coefficients
lh (torch.Tensor): horizontal coefficients
hl (torch.Tensor): vertical coefficients
hh (torch.Tensor): diagonal coefficients
filts (list of ndarray or torch.Tensor): If a list of tensors has been
given, this function assumes they are in the right form (the form
returned by
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d`).
Otherwise, this function will prepare the filters to be of the right
form by calling
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d`.
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
padding to use. If periodization, the output size will be half the
input size. Otherwise, the output size will be slightly larger than
half.
"""
tensorize = [not isinstance(x, torch.Tensor) for x in filts]
if len(filts) == 2:
g0, g1 = filts
if True in tensorize:
g0_col, g1_col, g0_row, g1_row = prep_filt_sfb2d(g0, g1)
else:
g0_col = g0
g0_row = g0.transpose(2, 3)
g1_col = g1
g1_row = g1.transpose(2, 3)
elif len(filts) == 4:
if True in tensorize:
g0_col, g1_col, g0_row, g1_row = prep_filt_sfb2d(*filts)
else:
g0_col, g1_col, g0_row, g1_row = filts
else:
raise ValueError("Unknown form for input filts")
lo = sfb1d(ll, lh, g0_col, g1_col, mode=mode, dim=2)
hi = sfb1d(hl, hh, g0_col, g1_col, mode=mode, dim=2)
y = sfb1d(lo, hi, g0_row, g1_row, mode=mode, dim=3)
return y
class SFB2D(Function):
""" Does a single level 2d wavelet decomposition of an input. Does separate
row and column filtering by two calls to
:py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
Needs to have the tensors in the right form. Because this function defines
its own backward pass, saves on memory by not having to save the input
tensors.
Inputs:
x (torch.Tensor): Input to decompose
h0_row: row lowpass
h1_row: row highpass
h0_col: col lowpass
h1_col: col highpass
mode (int): use mode_to_int to get the int code here
We encode the mode as an integer rather than a string as gradcheck causes an
error when a string is provided.
Returns:
y: Tensor of shape (N, C*4, H, W)
"""
@staticmethod
def forward(ctx, low, highs, g0_row, g1_row, g0_col, g1_col, mode):
mode = int_to_mode(mode)
ctx.mode = mode
ctx.save_for_backward(g0_row, g1_row, g0_col, g1_col)
lh, hl, hh = torch.unbind(highs, dim=2)
lo = sfb1d(low, lh, g0_col, g1_col, mode=mode, dim=2)
hi = sfb1d(hl, hh, g0_col, g1_col, mode=mode, dim=2)
y = sfb1d(lo, hi, g0_row, g1_row, mode=mode, dim=3)
return y
@staticmethod
def backward(ctx, dy):
dlow, dhigh = None, None
if ctx.needs_input_grad[0]:
mode = ctx.mode
g0_row, g1_row, g0_col, g1_col = ctx.saved_tensors
dx = afb1d(dy, g0_row, g1_row, mode=mode, dim=3)
dx = afb1d(dx, g0_col, g1_col, mode=mode, dim=2)
s = dx.shape
dx = dx.reshape(s[0], -1, 4, s[-2], s[-1])
dlow = dx[:, :, 0].contiguous()
dhigh = dx[:, :, 1:].contiguous()
return dlow, dhigh, None, None, None, None, None
class SFB1D(Function):
""" Does a single level 1d wavelet decomposition of an input.
Needs to have the tensors in the right form. Because this function defines
its own backward pass, saves on memory by not having to save the input
tensors.
Inputs:
low (torch.Tensor): Lowpass to reconstruct of shape (N, C, L)
high (torch.Tensor): Highpass to reconstruct of shape (N, C, L)
g0: lowpass
g1: highpass
mode (int): use mode_to_int to get the int code here
We encode the mode as an integer rather than a string as gradcheck causes an
error when a string is provided.
Returns:
y: Tensor of shape (N, C*2, L')
"""
@staticmethod
def forward(ctx, low, high, g0, g1, mode, use_amp):
mode = int_to_mode(mode)
# Make into a 2d tensor with 1 row
low = low[:, :, None, :]
high = high[:, :, None, :]
g0 = g0[:, :, None, :]
g1 = g1[:, :, None, :]
ctx.mode = mode
ctx.save_for_backward(g0, g1)
ctx.use_amp = use_amp
return sfb1d(low, high, g0, g1, use_amp, mode=mode, dim=3)[:, :, 0]
@staticmethod
def backward(ctx, dy):
dlow, dhigh = None, None
if ctx.needs_input_grad[0]:
mode = ctx.mode
use_amp = ctx.use_amp
g0, g1, = ctx.saved_tensors
dy = dy[:, :, None, :]
dx = afb1d(dy, g0, g1, use_amp, mode=mode, dim=3)
dlow = dx[:, ::2, 0].contiguous()
dhigh = dx[:, 1::2, 0].contiguous()
return dlow, dhigh, None, None, None, None, None
def sfb2d_nonsep(coeffs, filts, mode='zero'):
""" Does a single level 2d wavelet reconstruction of wavelet coefficients.
Does not do separable filtering.
Inputs:
coeffs (torch.Tensor): tensor of coefficients of shape (N, C, 4, H, W)
where the third dimension indexes across the (ll, lh, hl, hh) bands.
filts (list of ndarray or torch.Tensor): If a list of tensors has been
given, this function assumes they are in the right form (the form
returned by
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d_nonsep`).
Otherwise, this function will prepare the filters to be of the right
form by calling
:py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d_nonsep`.
mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
padding to use. If periodization, the output size will be half the
input size. Otherwise, the output size will be slightly larger than
half.
"""
C = coeffs.shape[1]
Ny = coeffs.shape[-2]
Nx = coeffs.shape[-1]
# Check the filter inputs - should be in the form of a torch tensor, but if
# not, tensorize it here.
if isinstance(filts, (tuple, list)):
if len(filts) == 2:
filts = prep_filt_sfb2d_nonsep(filts[0], filts[1],
device=coeffs.device)
elif len(filts) == 4:
filts = prep_filt_sfb2d_nonsep(
filts[0], filts[1], filts[2], filts[3], device=coeffs.device)
else:
raise ValueError("Unkown form for input filts")
f = torch.cat([filts] * C, dim=0)
Ly = f.shape[2]
Lx = f.shape[3]
x = coeffs.reshape(coeffs.shape[0], -1, coeffs.shape[-2], coeffs.shape[-1])
if mode == 'periodization' or mode == 'per':
ll = F.conv_transpose2d(x, f, groups=C, stride=2)
ll[:, :, :Ly - 2] += ll[:, :, 2 * Ny:2 * Ny + Ly - 2]
ll[:, :, :, :Lx - 2] += ll[:, :, :, 2 * Nx:2 * Nx + Lx - 2]
ll = ll[:, :, :2 * Ny, :2 * Nx]
ll = roll(roll(ll, 1 - Ly // 2, dim=2), 1 - Lx // 2, dim=3)
elif mode == 'symmetric' or mode == 'zero' or mode == 'reflect' or \
mode == 'periodic':
pad = (Ly - 2, Lx - 2)
ll = F.conv_transpose2d(x, f, padding=pad, groups=C, stride=2)
else:
raise ValueError("Unkown pad type: {}".format(mode))
return ll.contiguous()
def prep_filt_afb2d_nonsep(h0_col, h1_col, h0_row=None, h1_row=None,
device=None):
"""
Prepares the filters to be of the right form for the afb2d_nonsep function.
In particular, makes 2d point spread functions, and mirror images them in
preparation to do torch.conv2d.
Inputs:
h0_col (array-like): low pass column filter bank
h1_col (array-like): high pass column filter bank
h0_row (array-like): low pass row filter bank. If none, will assume the
same as column filter
h1_row (array-like): high pass row filter bank. If none, will assume the
same as column filter
device: which device to put the tensors on to
Returns:
filts: (4, 1, h, w) tensor ready to get the four subbands
"""
h0_col = np.array(h0_col).ravel()
h1_col = np.array(h1_col).ravel()
if h0_row is None:
h0_row = h0_col
if h1_row is None:
h1_row = h1_col
ll = np.outer(h0_col, h0_row)
lh = np.outer(h1_col, h0_row)
hl = np.outer(h0_col, h1_row)
hh = np.outer(h1_col, h1_row)
filts = np.stack([ll[None, ::-1, ::-1], lh[None, ::-1, ::-1],
hl[None, ::-1, ::-1], hh[None, ::-1, ::-1]], axis=0)
filts = torch.tensor(filts, dtype=torch.get_default_dtype(), device=device)
return filts
def prep_filt_sfb2d_nonsep(g0_col, g1_col, g0_row=None, g1_row=None,
device=None):
"""
Prepares the filters to be of the right form for the sfb2d_nonsep function.
In particular, makes 2d point spread functions. Does not mirror image them
as sfb2d_nonsep uses conv2d_transpose which acts like normal convolution.
Inputs:
g0_col (array-like): low pass column filter bank
g1_col (array-like): high pass column filter bank
g0_row (array-like): low pass row filter bank. If none, will assume the
same as column filter
g1_row (array-like): high pass row filter bank. If none, will assume the
same as column filter
device: which device to put the tensors on to
Returns:
filts: (4, 1, h, w) tensor ready to combine the four subbands
"""
g0_col = np.array(g0_col).ravel()
g1_col = np.array(g1_col).ravel()
if g0_row is None:
g0_row = g0_col
if g1_row is None:
g1_row = g1_col
ll = np.outer(g0_col, g0_row)
lh = np.outer(g1_col, g0_row)
hl = np.outer(g0_col, g1_row)
hh = np.outer(g1_col, g1_row)
filts = np.stack([ll[None], lh[None], hl[None], hh[None]], axis=0)
filts = torch.tensor(filts, dtype=torch.get_default_dtype(), device=device)
return filts
def prep_filt_sfb2d(g0_col, g1_col, g0_row=None, g1_row=None, device=None):
"""
Prepares the filters to be of the right form for the sfb2d function. In
particular, makes the tensors the right shape. It does not mirror image them
as as sfb2d uses conv2d_transpose which acts like normal convolution.
Inputs:
g0_col (array-like): low pass column filter bank
g1_col (array-like): high pass column filter bank
g0_row (array-like): low pass row filter bank. If none, will assume the
same as column filter
g1_row (array-like): high pass row filter bank. If none, will assume the
same as column filter
device: which device to put the tensors on to
Returns:
(g0_col, g1_col, g0_row, g1_row)
"""
g0_col, g1_col = prep_filt_sfb1d(g0_col, g1_col, device)
if g0_row is None:
g0_row, g1_row = g0_col, g1_col
else:
g0_row, g1_row = prep_filt_sfb1d(g0_row, g1_row, device)
g0_col = g0_col.reshape((1, 1, -1, 1))
g1_col = g1_col.reshape((1, 1, -1, 1))
g0_row = g0_row.reshape((1, 1, 1, -1))
g1_row = g1_row.reshape((1, 1, 1, -1))
return g0_col, g1_col, g0_row, g1_row
def prep_filt_sfb1d(g0, g1, device=None):
"""
Prepares the filters to be of the right form for the sfb1d function. In
particular, makes the tensors the right shape. It does not mirror image them
as as sfb2d uses conv2d_transpose which acts like normal convolution.
Inputs:
g0 (array-like): low pass filter bank
g1 (array-like): high pass filter bank
device: which device to put the tensors on to
Returns:
(g0, g1)
"""
g0 = np.array(g0).ravel()
g1 = np.array(g1).ravel()
t = torch.get_default_dtype()
g0 = torch.tensor(g0, device=device, dtype=t).reshape((1, 1, -1))
g1 = torch.tensor(g1, device=device, dtype=t).reshape((1, 1, -1))
return g0, g1
def prep_filt_afb2d(h0_col, h1_col, h0_row=None, h1_row=None, device=None):
"""
Prepares the filters to be of the right form for the afb2d function. In
particular, makes the tensors the right shape. It takes mirror images of
them as as afb2d uses conv2d which acts like normal correlation.
Inputs:
h0_col (array-like): low pass column filter bank
h1_col (array-like): high pass column filter bank
h0_row (array-like): low pass row filter bank. If none, will assume the
same as column filter
h1_row (array-like): high pass row filter bank. If none, will assume the
same as column filter
device: which device to put the tensors on to
Returns:
(h0_col, h1_col, h0_row, h1_row)
"""
h0_col, h1_col = prep_filt_afb1d(h0_col, h1_col, device)
if h0_row is None:
h0_row, h1_row = h0_col, h1_col
else:
h0_row, h1_row = prep_filt_afb1d(h0_row, h1_row, device)
h0_col = h0_col.reshape((1, 1, -1, 1))
h1_col = h1_col.reshape((1, 1, -1, 1))
h0_row = h0_row.reshape((1, 1, 1, -1))
h1_row = h1_row.reshape((1, 1, 1, -1))
return h0_col, h1_col, h0_row, h1_row
def prep_filt_afb1d(h0, h1, device=None):
"""
Prepares the filters to be of the right form for the afb2d function. In
particular, makes the tensors the right shape. It takes mirror images of
them as as afb2d uses conv2d which acts like normal correlation.
Inputs:
h0 (array-like): low pass column filter bank
h1 (array-like): high pass column filter bank
device: which device to put the tensors on to
Returns:
(h0, h1)
"""
h0 = np.array(h0[::-1]).ravel()
h1 = np.array(h1[::-1]).ravel()
t = torch.get_default_dtype()
h0 = torch.tensor(h0, device=device, dtype=t).reshape((1, 1, -1))
h1 = torch.tensor(h1, device=device, dtype=t).reshape((1, 1, -1))
return h0, h1
def reflect(x, minx, maxx):
"""Reflect the values in matrix *x* about the scalar values *minx* and
*maxx*. Hence a vector *x* containing a long linearly increasing series is
converted into a waveform which ramps linearly up and down between *minx*
and *maxx*. If *x* contains integers and *minx* and *maxx* are (integers +
0.5), the ramps will have repeated max and min samples.
.. codeauthor:: Rich Wareham , Aug 2013
.. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
"""
x = np.asanyarray(x)
rng = maxx - minx
rng_by_2 = 2 * rng
mod = np.fmod(x - minx, rng_by_2)
normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
return np.array(out, dtype=x.dtype)
================================================
FILE: layers/ETSformer_EncDec.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.fft as fft
from einops import rearrange, reduce, repeat
import math, random
from scipy.fftpack import next_fast_len
class Transform:
def __init__(self, sigma):
self.sigma = sigma
@torch.no_grad()
def transform(self, x):
return self.jitter(self.shift(self.scale(x)))
def jitter(self, x):
return x + (torch.randn(x.shape).to(x.device) * self.sigma)
def scale(self, x):
return x * (torch.randn(x.size(-1)).to(x.device) * self.sigma + 1)
def shift(self, x):
return x + (torch.randn(x.size(-1)).to(x.device) * self.sigma)
def conv1d_fft(f, g, dim=-1):
N = f.size(dim)
M = g.size(dim)
fast_len = next_fast_len(N + M - 1)
F_f = fft.rfft(f, fast_len, dim=dim)
F_g = fft.rfft(g, fast_len, dim=dim)
F_fg = F_f * F_g.conj()
out = fft.irfft(F_fg, fast_len, dim=dim)
out = out.roll((-1,), dims=(dim,))
idx = torch.as_tensor(range(fast_len - N, fast_len)).to(out.device)
out = out.index_select(dim, idx)
return out
class ExponentialSmoothing(nn.Module):
def __init__(self, dim, nhead, dropout=0.1, aux=False):
super().__init__()
self._smoothing_weight = nn.Parameter(torch.randn(nhead, 1))
self.v0 = nn.Parameter(torch.randn(1, 1, nhead, dim))
self.dropout = nn.Dropout(dropout)
if aux:
self.aux_dropout = nn.Dropout(dropout)
def forward(self, values, aux_values=None):
b, t, h, d = values.shape
init_weight, weight = self.get_exponential_weight(t)
output = conv1d_fft(self.dropout(values), weight, dim=1)
output = init_weight * self.v0 + output
if aux_values is not None:
aux_weight = weight / (1 - self.weight) * self.weight
aux_output = conv1d_fft(self.aux_dropout(aux_values), aux_weight)
output = output + aux_output
return output
def get_exponential_weight(self, T):
# Generate array [0, 1, ..., T-1]
powers = torch.arange(T, dtype=torch.float, device=self.weight.device)
# (1 - \alpha) * \alpha^t, for all t = T-1, T-2, ..., 0]
weight = (1 - self.weight) * (self.weight ** torch.flip(powers, dims=(0,)))
# \alpha^t for all t = 1, 2, ..., T
init_weight = self.weight ** (powers + 1)
return rearrange(init_weight, 'h t -> 1 t h 1'), \
rearrange(weight, 'h t -> 1 t h 1')
@property
def weight(self):
return torch.sigmoid(self._smoothing_weight)
class Feedforward(nn.Module):
def __init__(self, d_model, dim_feedforward, dropout=0.1, activation='sigmoid'):
# Implementation of Feedforward model
super().__init__()
self.linear1 = nn.Linear(d_model, dim_feedforward, bias=False)
self.dropout1 = nn.Dropout(dropout)
self.linear2 = nn.Linear(dim_feedforward, d_model, bias=False)
self.dropout2 = nn.Dropout(dropout)
self.activation = getattr(F, activation)
def forward(self, x):
x = self.linear2(self.dropout1(self.activation(self.linear1(x))))
return self.dropout2(x)
class GrowthLayer(nn.Module):
def __init__(self, d_model, nhead, d_head=None, dropout=0.1):
super().__init__()
self.d_head = d_head or (d_model // nhead)
self.d_model = d_model
self.nhead = nhead
self.z0 = nn.Parameter(torch.randn(self.nhead, self.d_head))
self.in_proj = nn.Linear(self.d_model, self.d_head * self.nhead)
self.es = ExponentialSmoothing(self.d_head, self.nhead, dropout=dropout)
self.out_proj = nn.Linear(self.d_head * self.nhead, self.d_model)
assert self.d_head * self.nhead == self.d_model, "d_model must be divisible by nhead"
def forward(self, inputs):
"""
:param inputs: shape: (batch, seq_len, dim)
:return: shape: (batch, seq_len, dim)
"""
b, t, d = inputs.shape
values = self.in_proj(inputs).view(b, t, self.nhead, -1)
values = torch.cat([repeat(self.z0, 'h d -> b 1 h d', b=b), values], dim=1)
values = values[:, 1:] - values[:, :-1]
out = self.es(values)
out = torch.cat([repeat(self.es.v0, '1 1 h d -> b 1 h d', b=b), out], dim=1)
out = rearrange(out, 'b t h d -> b t (h d)')
return self.out_proj(out)
class FourierLayer(nn.Module):
def __init__(self, d_model, pred_len, k=None, low_freq=1):
super().__init__()
self.d_model = d_model
self.pred_len = pred_len
self.k = k
self.low_freq = low_freq
def forward(self, x):
"""x: (b, t, d)"""
b, t, d = x.shape
x_freq = fft.rfft(x, dim=1)
if t % 2 == 0:
x_freq = x_freq[:, self.low_freq:-1]
f = fft.rfftfreq(t)[self.low_freq:-1]
else:
x_freq = x_freq[:, self.low_freq:]
f = fft.rfftfreq(t)[self.low_freq:]
x_freq, index_tuple = self.topk_freq(x_freq)
f = repeat(f, 'f -> b f d', b=x_freq.size(0), d=x_freq.size(2))
f = rearrange(f[index_tuple], 'b f d -> b f () d').to(x_freq.device)
return self.extrapolate(x_freq, f, t)
def extrapolate(self, x_freq, f, t):
x_freq = torch.cat([x_freq, x_freq.conj()], dim=1)
f = torch.cat([f, -f], dim=1)
t_val = rearrange(torch.arange(t + self.pred_len, dtype=torch.float),
't -> () () t ()').to(x_freq.device)
amp = rearrange(x_freq.abs() / t, 'b f d -> b f () d')
phase = rearrange(x_freq.angle(), 'b f d -> b f () d')
x_time = amp * torch.cos(2 * math.pi * f * t_val + phase)
return reduce(x_time, 'b f t d -> b t d', 'sum')
def topk_freq(self, x_freq):
values, indices = torch.topk(x_freq.abs(), self.k, dim=1, largest=True, sorted=True)
mesh_a, mesh_b = torch.meshgrid(torch.arange(x_freq.size(0)), torch.arange(x_freq.size(2)))
index_tuple = (mesh_a.unsqueeze(1).to(indices.device), indices, mesh_b.unsqueeze(1).to(indices.device))
x_freq = x_freq[index_tuple]
return x_freq, index_tuple
class LevelLayer(nn.Module):
def __init__(self, d_model, c_out, dropout=0.1):
super().__init__()
self.d_model = d_model
self.c_out = c_out
self.es = ExponentialSmoothing(1, self.c_out, dropout=dropout, aux=True)
self.growth_pred = nn.Linear(self.d_model, self.c_out)
self.season_pred = nn.Linear(self.d_model, self.c_out)
def forward(self, level, growth, season):
b, t, _ = level.shape
growth = self.growth_pred(growth).view(b, t, self.c_out, 1)
season = self.season_pred(season).view(b, t, self.c_out, 1)
growth = growth.view(b, t, self.c_out, 1)
season = season.view(b, t, self.c_out, 1)
level = level.view(b, t, self.c_out, 1)
out = self.es(level - season, aux_values=growth)
out = rearrange(out, 'b t h d -> b t (h d)')
return out
class EncoderLayer(nn.Module):
def __init__(self, d_model, nhead, c_out, seq_len, pred_len, k, dim_feedforward=None, dropout=0.1,
activation='sigmoid', layer_norm_eps=1e-5):
super().__init__()
self.d_model = d_model
self.nhead = nhead
self.c_out = c_out
self.seq_len = seq_len
self.pred_len = pred_len
dim_feedforward = dim_feedforward or 4 * d_model
self.dim_feedforward = dim_feedforward
self.growth_layer = GrowthLayer(d_model, nhead, dropout=dropout)
self.seasonal_layer = FourierLayer(d_model, pred_len, k=k)
self.level_layer = LevelLayer(d_model, c_out, dropout=dropout)
# Implementation of Feedforward model
self.ff = Feedforward(d_model, dim_feedforward, dropout=dropout, activation=activation)
self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps)
self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)
def forward(self, res, level, attn_mask=None):
season = self._season_block(res)
res = res - season[:, :-self.pred_len]
growth = self._growth_block(res)
res = self.norm1(res - growth[:, 1:])
res = self.norm2(res + self.ff(res))
level = self.level_layer(level, growth[:, :-1], season[:, :-self.pred_len])
return res, level, growth, season
def _growth_block(self, x):
x = self.growth_layer(x)
return self.dropout1(x)
def _season_block(self, x):
x = self.seasonal_layer(x)
return self.dropout2(x)
class Encoder(nn.Module):
def __init__(self, layers):
super().__init__()
self.layers = nn.ModuleList(layers)
def forward(self, res, level, attn_mask=None):
growths = []
seasons = []
for layer in self.layers:
res, level, growth, season = layer(res, level, attn_mask=None)
growths.append(growth)
seasons.append(season)
return level, growths, seasons
class DampingLayer(nn.Module):
def __init__(self, pred_len, nhead, dropout=0.1):
super().__init__()
self.pred_len = pred_len
self.nhead = nhead
self._damping_factor = nn.Parameter(torch.randn(1, nhead))
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = repeat(x, 'b 1 d -> b t d', t=self.pred_len)
b, t, d = x.shape
powers = torch.arange(self.pred_len).to(self._damping_factor.device) + 1
powers = powers.view(self.pred_len, 1)
damping_factors = self.damping_factor ** powers
damping_factors = damping_factors.cumsum(dim=0)
x = x.view(b, t, self.nhead, -1)
x = self.dropout(x) * damping_factors.unsqueeze(-1)
return x.view(b, t, d)
@property
def damping_factor(self):
return torch.sigmoid(self._damping_factor)
class DecoderLayer(nn.Module):
def __init__(self, d_model, nhead, c_out, pred_len, dropout=0.1):
super().__init__()
self.d_model = d_model
self.nhead = nhead
self.c_out = c_out
self.pred_len = pred_len
self.growth_damping = DampingLayer(pred_len, nhead, dropout=dropout)
self.dropout1 = nn.Dropout(dropout)
def forward(self, growth, season):
growth_horizon = self.growth_damping(growth[:, -1:])
growth_horizon = self.dropout1(growth_horizon)
seasonal_horizon = season[:, -self.pred_len:]
return growth_horizon, seasonal_horizon
class Decoder(nn.Module):
def __init__(self, layers):
super().__init__()
self.d_model = layers[0].d_model
self.c_out = layers[0].c_out
self.pred_len = layers[0].pred_len
self.nhead = layers[0].nhead
self.layers = nn.ModuleList(layers)
self.pred = nn.Linear(self.d_model, self.c_out)
def forward(self, growths, seasons):
growth_repr = []
season_repr = []
for idx, layer in enumerate(self.layers):
growth_horizon, season_horizon = layer(growths[idx], seasons[idx])
growth_repr.append(growth_horizon)
season_repr.append(season_horizon)
growth_repr = sum(growth_repr)
season_repr = sum(season_repr)
return self.pred(growth_repr), self.pred(season_repr)
================================================
FILE: layers/Embed.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import math
class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEmbedding, self).__init__()
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model).float()
pe.require_grad = False
position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float()
* -(math.log(10000.0) / d_model)).exp()
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
return self.pe[:, :x.size(1)]
class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
padding = 1 if torch.__version__ >= '1.5.0' else 2
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
kernel_size=3, padding=padding, padding_mode='circular', bias=False)
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(
m.weight, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
return x
class FixedEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(FixedEmbedding, self).__init__()
w = torch.zeros(c_in, d_model).float()
w.require_grad = False
position = torch.arange(0, c_in).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float()
* -(math.log(10000.0) / d_model)).exp()
w[:, 0::2] = torch.sin(position * div_term)
w[:, 1::2] = torch.cos(position * div_term)
self.emb = nn.Embedding(c_in, d_model)
self.emb.weight = nn.Parameter(w, requires_grad=False)
def forward(self, x):
return self.emb(x).detach()
class TemporalEmbedding(nn.Module):
def __init__(self, d_model, embed_type='fixed', freq='h'):
super(TemporalEmbedding, self).__init__()
minute_size = 4
hour_size = 24
weekday_size = 7
day_size = 32
month_size = 13
Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
if freq == 't':
self.minute_embed = Embed(minute_size, d_model)
self.hour_embed = Embed(hour_size, d_model)
self.weekday_embed = Embed(weekday_size, d_model)
self.day_embed = Embed(day_size, d_model)
self.month_embed = Embed(month_size, d_model)
def forward(self, x):
x = x.long()
minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
self, 'minute_embed') else 0.
hour_x = self.hour_embed(x[:, :, 3])
weekday_x = self.weekday_embed(x[:, :, 2])
day_x = self.day_embed(x[:, :, 1])
month_x = self.month_embed(x[:, :, 0])
return hour_x + weekday_x + day_x + month_x + minute_x
class TimeFeatureEmbedding(nn.Module):
def __init__(self, d_model, embed_type='timeF', freq='h'):
super(TimeFeatureEmbedding, self).__init__()
freq_map = {'h': 4, 't': 5, 's': 6,
'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
d_inp = freq_map[freq]
self.embed = nn.Linear(d_inp, d_model, bias=False)
def forward(self, x):
return self.embed(x)
class DataEmbedding(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
if x_mark is None:
x = self.value_embedding(x) + self.position_embedding(x)
else:
x = self.value_embedding(
x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
return self.dropout(x)
class DataEmbedding_inverted(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_inverted, self).__init__()
self.value_embedding = nn.Linear(c_in, d_model)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
x = x.permute(0, 2, 1)
# x: [Batch Variate Time]
if x_mark is None:
x = self.value_embedding(x)
else:
x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
# x: [Batch Variate d_model]
return self.dropout(x)
class DataEmbedding_wo_pos(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_pos, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
if x_mark is None:
x = self.value_embedding(x)
else:
x = self.value_embedding(x) + self.temporal_embedding(x_mark)
return self.dropout(x)
class PatchEmbedding(nn.Module):
def __init__(self, d_model, patch_len, stride, padding, dropout):
super(PatchEmbedding, self).__init__()
# Patching
self.patch_len = patch_len
self.stride = stride
self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
# Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
# Positional embedding
self.position_embedding = PositionalEmbedding(d_model)
# Residual dropout
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# do patching
n_vars = x.shape[1]
x = self.padding_patch_layer(x)
x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
# Input encoding
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x), n_vars
================================================
FILE: layers/FourierCorrelation.py
================================================
# coding=utf-8
# author=maziqing
# email=maziqing.mzq@alibaba-inc.com
import numpy as np
import torch
import torch.nn as nn
def get_frequency_modes(seq_len, modes=64, mode_select_method='random'):
"""
get modes on frequency domain:
'random' means sampling randomly;
'else' means sampling the lowest modes;
"""
modes = min(modes, seq_len // 2)
if mode_select_method == 'random':
index = list(range(0, seq_len // 2))
np.random.shuffle(index)
index = index[:modes]
else:
index = list(range(0, modes))
index.sort()
return index
# ########## fourier layer #############
class FourierBlock(nn.Module):
def __init__(self, in_channels, out_channels, n_heads, seq_len, modes=0, mode_select_method='random'):
super(FourierBlock, self).__init__()
print('fourier enhanced block used!')
"""
1D Fourier block. It performs representation learning on frequency domain,
it does FFT, linear transform, and Inverse FFT.
"""
# get modes on frequency domain
self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method)
print('modes={}, index={}'.format(modes, self.index))
self.n_heads = n_heads
self.scale = (1 / (in_channels * out_channels))
self.weights1 = nn.Parameter(
self.scale * torch.rand(self.n_heads, in_channels // self.n_heads, out_channels // self.n_heads,
len(self.index), dtype=torch.float))
self.weights2 = nn.Parameter(
self.scale * torch.rand(self.n_heads, in_channels // self.n_heads, out_channels // self.n_heads,
len(self.index), dtype=torch.float))
# Complex multiplication
def compl_mul1d(self, order, x, weights):
x_flag = True
w_flag = True
if not torch.is_complex(x):
x_flag = False
x = torch.complex(x, torch.zeros_like(x).to(x.device))
if not torch.is_complex(weights):
w_flag = False
weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
if x_flag or w_flag:
return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
else:
return torch.einsum(order, x.real, weights.real)
def forward(self, q, k, v, mask):
# size = [B, L, H, E]
B, L, H, E = q.shape
x = q.permute(0, 2, 3, 1)
# Compute Fourier coefficients
x_ft = torch.fft.rfft(x, dim=-1)
# Perform Fourier neural operations
out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
for wi, i in enumerate(self.index):
if i >= x_ft.shape[3] or wi >= out_ft.shape[3]:
continue
out_ft[:, :, :, wi] = self.compl_mul1d("bhi,hio->bho", x_ft[:, :, :, i],
torch.complex(self.weights1, self.weights2)[:, :, :, wi])
# Return to time domain
x = torch.fft.irfft(out_ft, n=x.size(-1))
return (x, None)
# ########## Fourier Cross Former ####################
class FourierCrossAttention(nn.Module):
def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random',
activation='tanh', policy=0, num_heads=8):
super(FourierCrossAttention, self).__init__()
print(' fourier enhanced cross attention used!')
"""
1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT.
"""
self.activation = activation
self.in_channels = in_channels
self.out_channels = out_channels
# get modes for queries and keys (& values) on frequency domain
self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method)
self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method)
print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q))
print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv))
self.scale = (1 / (in_channels * out_channels))
self.weights1 = nn.Parameter(
self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float))
self.weights2 = nn.Parameter(
self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float))
# Complex multiplication
def compl_mul1d(self, order, x, weights):
x_flag = True
w_flag = True
if not torch.is_complex(x):
x_flag = False
x = torch.complex(x, torch.zeros_like(x).to(x.device))
if not torch.is_complex(weights):
w_flag = False
weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
if x_flag or w_flag:
return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
else:
return torch.einsum(order, x.real, weights.real)
def forward(self, q, k, v, mask):
# size = [B, L, H, E]
B, L, H, E = q.shape
xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L]
xk = k.permute(0, 2, 3, 1)
xv = v.permute(0, 2, 3, 1)
# Compute Fourier coefficients
xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
xq_ft = torch.fft.rfft(xq, dim=-1)
for i, j in enumerate(self.index_q):
if j >= xq_ft.shape[3]:
continue
xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat)
xk_ft = torch.fft.rfft(xk, dim=-1)
for i, j in enumerate(self.index_kv):
if j >= xk_ft.shape[3]:
continue
xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
# perform attention mechanism on frequency domain
xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_))
if self.activation == 'tanh':
xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh())
elif self.activation == 'softmax':
xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
else:
raise Exception('{} actiation function is not implemented'.format(self.activation))
xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_)
xqkvw = self.compl_mul1d("bhex,heox->bhox", xqkv_ft, torch.complex(self.weights1, self.weights2))
out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
for i, j in enumerate(self.index_q):
if i >= xqkvw.shape[3] or j >= out_ft.shape[3]:
continue
out_ft[:, :, :, j] = xqkvw[:, :, :, i]
# Return to time domain
out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1))
return (out, None)
================================================
FILE: layers/MSGBlock.py
================================================
from math import sqrt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch import nn, Tensor
from einops import rearrange
from einops.layers.torch import Rearrange
from utils.masking import TriangularCausalMask
class Predict(nn.Module):
def __init__(self, individual, c_out, seq_len, pred_len, dropout):
super(Predict, self).__init__()
self.individual = individual
self.c_out = c_out
if self.individual:
self.seq2pred = nn.ModuleList()
self.dropout = nn.ModuleList()
for i in range(self.c_out):
self.seq2pred.append(nn.Linear(seq_len , pred_len))
self.dropout.append(nn.Dropout(dropout))
else:
self.seq2pred = nn.Linear(seq_len , pred_len)
self.dropout = nn.Dropout(dropout)
#(B, c_out , seq)
def forward(self, x):
if self.individual:
out = []
for i in range(self.c_out):
per_out = self.seq2pred[i](x[:,i,:])
per_out = self.dropout[i](per_out)
out.append(per_out)
out = torch.stack(out,dim=1)
else:
out = self.seq2pred(x)
out = self.dropout(out)
return out
class Attention_Block(nn.Module):
def __init__(self, d_model, d_ff=None, n_heads=8, dropout=0.1, activation="relu"):
super(Attention_Block, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = self_attention(FullAttention, d_model, n_heads=n_heads)
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y)
class self_attention(nn.Module):
def __init__(self, attention, d_model ,n_heads):
super(self_attention, self).__init__()
d_keys = d_model // n_heads
d_values = d_model // n_heads
self.inner_attention = attention( attention_dropout = 0.1)
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries ,keys ,values, attn_mask= None):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
out = out.view(B, L, -1)
out = self.out_projection(out)
return out , attn
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
# return V.contiguous()
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class GraphBlock(nn.Module):
def __init__(self, c_out , d_model , conv_channel, skip_channel,
gcn_depth , dropout, propalpha ,seq_len , node_dim):
super(GraphBlock, self).__init__()
self.nodevec1 = nn.Parameter(torch.randn(c_out, node_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_dim, c_out), requires_grad=True)
self.start_conv = nn.Conv2d(1, conv_channel, (d_model - c_out + 1, 1))
self.gconv1 = mixprop(conv_channel, skip_channel, gcn_depth, dropout, propalpha)
self.gelu = nn.GELU()
self.end_conv = nn.Conv2d(skip_channel, seq_len , (1, seq_len ))
self.linear = nn.Linear(c_out, d_model)
self.norm = nn.LayerNorm(d_model)
# x in (B, T, d_model)
# Here we use a mlp to fit a complex mapping f (x)
def forward(self, x):
adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1)
out = x.unsqueeze(1).transpose(2, 3)
out = self.start_conv(out)
out = self.gelu(self.gconv1(out , adp))
out = self.end_conv(out).squeeze(-1)
out = self.linear(out)
return self.norm(x + out)
class nconv(nn.Module):
def __init__(self):
super(nconv,self).__init__()
def forward(self,x, A):
x = torch.einsum('ncwl,vw->ncvl',(x,A))
# x = torch.einsum('ncwl,wv->nclv',(x,A)
return x.contiguous()
class linear(nn.Module):
def __init__(self,c_in,c_out,bias=True):
super(linear,self).__init__()
self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=bias)
def forward(self,x):
return self.mlp(x)
class mixprop(nn.Module):
def __init__(self,c_in,c_out,gdep,dropout,alpha):
super(mixprop, self).__init__()
self.nconv = nconv()
self.mlp = linear((gdep+1)*c_in,c_out)
self.gdep = gdep
self.dropout = dropout
self.alpha = alpha
def forward(self, x, adj):
adj = adj + torch.eye(adj.size(0)).to(x.device)
d = adj.sum(1)
h = x
out = [h]
a = adj / d.view(-1, 1)
for i in range(self.gdep):
h = self.alpha*x + (1-self.alpha)*self.nconv(h,a)
out.append(h)
ho = torch.cat(out,dim=1)
ho = self.mlp(ho)
return ho
class simpleVIT(nn.Module):
def __init__(self, in_channels, emb_size, patch_size=2, depth=1, num_heads=4, dropout=0.1,init_weight =True):
super(simpleVIT, self).__init__()
self.emb_size = emb_size
self.depth = depth
self.to_patch = nn.Sequential(
nn.Conv2d(in_channels, emb_size, 2 * patch_size + 1, padding= patch_size),
Rearrange('b e (h) (w) -> b (h w) e'),
)
self.layers = nn.ModuleList([])
for _ in range(self.depth):
self.layers.append(nn.ModuleList([
nn.LayerNorm(emb_size),
MultiHeadAttention(emb_size, num_heads, dropout),
FeedForward(emb_size, emb_size)
]))
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self,x):
B , N ,_ ,P = x.shape
x = self.to_patch(x)
# x = x.permute(0, 2, 3, 1).reshape(B,-1, N)
for norm ,attn, ff in self.layers:
x = attn(norm(x)) + x
x = ff(x) + x
x = x.transpose(1,2).reshape(B, self.emb_size ,-1, P)
return x
class MultiHeadAttention(nn.Module):
def __init__(self, emb_size, num_heads, dropout):
super().__init__()
self.emb_size = emb_size
self.num_heads = num_heads
self.keys = nn.Linear(emb_size, emb_size)
self.queries = nn.Linear(emb_size, emb_size)
self.values = nn.Linear(emb_size, emb_size)
self.att_drop = nn.Dropout(dropout)
self.projection = nn.Linear(emb_size, emb_size)
def forward(self, x: Tensor, mask: Tensor = None) -> Tensor:
queries = rearrange(self.queries(x), "b n (h d) -> b h n d", h=self.num_heads)
keys = rearrange(self.keys(x), "b n (h d) -> b h n d", h=self.num_heads)
values = rearrange(self.values(x), "b n (h d) -> b h n d", h=self.num_heads)
energy = torch.einsum('bhqd, bhkd -> bhqk', queries, keys)
if mask is not None:
fill_value = torch.finfo(torch.float32).min
energy.mask_fill(~mask, fill_value)
scaling = self.emb_size ** (1 / 2)
att = F.softmax(energy, dim=-1) / scaling
att = self.att_drop(att)
# sum up over the third axis
out = torch.einsum('bhal, bhlv -> bhav ', att, values)
out = rearrange(out, "b h n d -> b n (h d)")
out = self.projection(out)
return out
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim):
super().__init__()
self.net = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Linear(hidden_dim, dim),
)
def forward(self, x):
return self.net(x)
================================================
FILE: layers/MambaBlock.py
================================================
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, repeat
from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
try:
from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
except ImportError:
causal_conv1d_fn, causal_conv1d_update = None, None
### DELETED: selective_state_update is not used in this experiment since it does not support the use of timevariant dt, B, C flags.
# from mamba_ssm.ops.selective_scan_interface import mamba_inner_fn
# try:
# from mamba_ssm.ops.triton.selective_state_update import selective_state_update
# except ImportError:
# selective_state_update = None
class Mamba_TimeVariant(nn.Module):
"""
Mamba Block with support for time-variant dt, B, C.
The time-variant parameters are controlled by `timevariant_dt`, `timevariant_B`, and `timevariant_C` flags.
Difference from the original `modules.mamba_simple.Mamba` class:
- In `step()`, `x_proj` can be `None`, so dt, B, and C are split only when guarded by if `self.x_proj` is not `None`.
- When `tv_dt=False`, dt is constructed as a bias-based constant and expanded to shape `(B, d_inner)` via repeat to match einsum dimensions.
- When `d_conv=0`, step avoids accessing depthwise convolution weights and instead follows the `SiLU(x)` path.
- In cache creation (`allocate_inference_cache`, `_get_states_from_cache`), dtype and device are selected safely even when `conv1d` is `Identity`.
"""
def __init__(
self,
d_model,
d_input=None, ### added
d_output=None, ### added
d_state=16,
d_conv=4,
expand=2,
dt_rank="auto",
dt_min=0.001,
dt_max=0.1,
dt_init="random",
dt_scale=1.0,
dt_init_floor=1e-4,
conv_bias=True,
bias=False,
use_fast_path=False, # Fused kernel options. ** Fixed to False for this experiment **
layer_idx=None,
device=None,
dtype=None,
timevariant_dt=True, ### ADDED: to support timevariant dt
timevariant_B=True, ### ADDED: to support timevariant B
timevariant_C=True, ### ADDED: to support timevariant C
use_D=True, ### ADDED: to control the usage of D parameter
):
factory_kwargs = {"device": device, "dtype": dtype}
super().__init__()
self.d_model = d_model
self.d_input = d_input if d_input is not None else d_model ### ADDED: for various input dimensions
self.d_output = d_output if d_output is not None else d_model ### ADDED: for various output dimensions
self.d_state = d_state
self.d_conv = d_conv
self.expand = expand
self.d_inner = int(self.expand * self.d_model)
self.dt_rank = math.ceil(self.d_model / 16) if dt_rank == "auto" else dt_rank
self.use_fast_path = use_fast_path
self.layer_idx = layer_idx
### MODIFIED: change the in_feature dimension from d_model to d_input
self.in_proj = nn.Linear(self.d_input, self.d_inner * 2, bias=bias, **factory_kwargs)
self.conv1d = nn.Conv1d(
in_channels=self.d_inner,
out_channels=self.d_inner,
bias=conv_bias,
kernel_size=d_conv,
groups=self.d_inner,
padding=d_conv - 1,
**factory_kwargs,
) if d_conv > 0 else nn.Identity() ### MODIFIED: Skip the convolution if d_conv is set to 0
self.activation = "silu"
self.act = nn.SiLU()
### MODIFIED: adjust the x_proj layer to support timevariant dt, B, C.
### this is possible since selective_scan.cpp has `is_variable_B` and `is_variable_C` flags that control the usage of timevariant B and C
self.tv_dt, self.tv_B, self.tv_C = timevariant_dt, timevariant_B, timevariant_C
self.tv_proj_dim = [0, 0, 0,]
if timevariant_dt | timevariant_B | timevariant_C:
if timevariant_dt:
self.tv_proj_dim[0] = self.dt_rank
if timevariant_B:
self.tv_proj_dim[1] = self.d_state
if timevariant_C:
self.tv_proj_dim[2] = self.d_state
self.x_proj = nn.Linear(
self.d_inner, sum(self.tv_proj_dim), bias=False, **factory_kwargs
) if sum(self.tv_proj_dim) > 0 else None
### ADDED: if tv flags are False, we will use constants for dt, B, C
if not timevariant_B:
self.B = nn.Parameter(torch.rand(self.d_inner, self.d_state, **factory_kwargs))
self.B._no_weight_decay = True
if not timevariant_C:
self.C = nn.Parameter(torch.rand(self.d_inner, self.d_state, **factory_kwargs))
self.C._no_weight_decay = True
self.dt_proj = nn.Linear(self.dt_rank, self.d_inner, bias=True, **factory_kwargs)
# Initialize special dt projection to preserve variance at initialization
dt_init_std = self.dt_rank**-0.5 * dt_scale
if dt_init == "constant":
nn.init.constant_(self.dt_proj.weight, dt_init_std)
elif dt_init == "random":
nn.init.uniform_(self.dt_proj.weight, -dt_init_std, dt_init_std)
else:
raise NotImplementedError
# Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max
dt = torch.exp(
torch.rand(self.d_inner, **factory_kwargs) * (math.log(dt_max) - math.log(dt_min))
+ math.log(dt_min)
).clamp(min=dt_init_floor)
# Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
inv_dt = dt + torch.log(-torch.expm1(-dt))
with torch.no_grad():
self.dt_proj.bias.copy_(inv_dt)
# Our initialization would set all Linear.bias to zero, need to mark this one as _no_reinit
self.dt_proj.bias._no_reinit = True
# S4D real initialization
# (expand * d_model, d_state)
# A = [[1, 2, ..., d_state], [1, 2, ..., d_state], ..., [1, 2, ..., d_state]]
A = repeat(
torch.arange(1, self.d_state + 1, dtype=torch.float32, device=device),
"n -> d n",
d=self.d_inner,
).contiguous()
A_log = torch.log(A) # Keep A_log in fp32
self.A_log = nn.Parameter(A_log)
self.A_log._no_weight_decay = True
# D "skip" parameter
### MODIFIED: D is a learnable parameter only if use_D is True else it is not used
### this is possible since selective_scan.cpp allows D to be optional
if use_D:
self.D = nn.Parameter(torch.ones(self.d_inner, device=device)).float()
self.D._no_weight_decay = True
else:
self.D = None
### MODIFIED: out_proj now has d_output instead of d_model
self.out_proj = nn.Linear(self.d_inner, self.d_output, bias=bias, **factory_kwargs)
def forward(self, hidden_states, inference_params=None):
"""
hidden_states: (B, L, D)
Returns: same shape as hidden_states
"""
batch, seqlen, d_input = hidden_states.shape
conv_state, ssm_state = None, None
if inference_params is not None:
conv_state, ssm_state = self._get_states_from_cache(inference_params, batch)
if inference_params.seqlen_offset > 0:
# The states are updated inplace
out, _, _ = self.step(hidden_states, conv_state, ssm_state)
return out
# We do matmul and transpose BLH -> HBL at the same time
xz = rearrange(
self.in_proj.weight @ rearrange(hidden_states, "b l d -> d (b l)"),
"d (b l) -> b d l",
l=seqlen,
) # (d_inner * 2, d_input) @ (d_input, batch * seqlen) -> (d_inner * 2, batch, seqlen) -> (batch, d_inner * 2, seqlen)
if self.in_proj.bias is not None:
xz = xz + rearrange(self.in_proj.bias.to(dtype=xz.dtype), "d -> d 1")
A = -torch.exp(self.A_log.float()) # (d_inner, d_state). always have negative values.
### DELETED: Actually not used in this experiment since we should control the usage of timevariant dt,B,C
# # In the backward pass we write dx and dz next to each other to avoid torch.cat
# if self.use_fast_path and causal_conv1d_fn is not None and inference_params is None: # Doesn't support outputting the states
# out = mamba_inner_fn(
# xz,
# self.conv1d.weight,
# self.conv1d.bias,
# self.x_proj.weight,
# self.dt_proj.weight,
# self.out_proj.weight,
# self.out_proj.bias,
# A,
# None, # input-dependent B
# None, # input-dependent C
# self.D,
# delta_bias=self.dt_proj.bias.float(),
# delta_softplus=True,
# )
# else:
x, z = xz.chunk(2, dim=1) # (batch, d_inner, seqlen), (batch, d_inner, seqlen)
# Compute short convolution
if conv_state is not None:
# If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
# Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
if self.d_conv > 0:
conv_state.copy_(F.pad(x, (self.d_conv - x.shape[-1], 0))) # Update state (B D W)
### MODIFIED: use causal_conv if available
if (causal_conv1d_fn is None) or (self.d_conv not in [2, 3, 4]):
x = self.act(self.conv1d(x)[..., :seqlen])
else:
assert self.activation in ["silu", "swish"]
x = causal_conv1d_fn(
x=x,
weight=rearrange(self.conv1d.weight, "d 1 w -> d w"),
bias=self.conv1d.bias,
activation=self.activation,
) # (batch, d_inner, seqlen)
# We're careful here about the layout, to avoid extra transposes.
# We want dt to have d as the slowest moving dimension
# and L as the fastest moving dimension, since those are what the ssm_scan kernel expects.
### MODIFIED: x_proj is now optional and only used if either timevariant dt, B, or C is True
if self.x_proj is not None:
x_dbl = self.x_proj(rearrange(x, "b d l -> (b l) d")) # (batch, d_inner, seqlen) -> (batch * seqlen, d_inner) -> (batch * seqlen, ...) depending on timevariant flags
dt, B, C = torch.split(x_dbl, self.tv_proj_dim, dim=-1) # (batch * seqlen, dt_rank), (batch * seqlen, d_state), (batch * seqlen, d_state) if enabled
### MODIFIED: dt, B, C are now set based on each timevariant flags
# If timevariant dt is False, we use a constant dt, which will be set in delta_bias parameter. Thus, we don't need to compute dt here.
if not self.tv_dt:
dt = torch.zeros(batch, self.d_inner, seqlen, device=self.dt_proj.bias.device, dtype=self.dt_proj.bias.dtype) # (batch, d_inner, seqlen)
else:
dt = self.dt_proj.weight @ dt.t() # (d_inner, d_rank) @ (d_rank, batch * seqlen) -> (d_inner, batch * seqlen)
dt = rearrange(dt, "d (b l) -> b d l", l=seqlen) # (batch, d_inner, seqlen)
# if timevariant B is False, we use a constant B, which is defined in __init__.
if not self.tv_B:
B = self.B # (d_inner, d_state)
else:
B = rearrange(B, "(b l) dstate -> b dstate l", l=seqlen).contiguous() # (b, dstate, l)
# if timevariant C is False, we use a constant C, which is defined in __init__.
if not self.tv_C:
C = self.C # (d_inner, d_state)
else:
C = rearrange(C, "(b l) dstate -> b dstate l", l=seqlen).contiguous() # (b, dstate, l)
assert self.activation in ["silu", "swish"]
y = selective_scan_fn(
x,
dt,
A,
B,
C,
self.D,
z=z,
delta_bias=self.dt_proj.bias.float(),
delta_softplus=True,
return_last_state=ssm_state is not None,
)
if ssm_state is not None:
y, last_state = y
ssm_state.copy_(last_state)
y = rearrange(y, "b d l -> b l d")
out = self.out_proj(y)
return out
def step(self, hidden_states, conv_state, ssm_state):
dtype = hidden_states.dtype
assert hidden_states.shape[1] == 1, "Only support decoding with 1 token at a time for now"
xz = self.in_proj(hidden_states.squeeze(1)) # (batch, d_inner * 2)
x, z = xz.chunk(2, dim=-1) # (batch, d_inner), (batch, d_inner)
# Conv step
if self.d_conv == 0:
x = self.act(x).to(dtype=dtype)
elif (causal_conv1d_update is None) or (self.d_conv not in [2, 3, 4]):
conv_state.copy_(torch.roll(conv_state, shifts=-1, dims=-1)) # Update state (B D W)
conv_state[:, :, -1] = x
x = torch.sum(conv_state * rearrange(self.conv1d.weight, "d 1 w -> d w"), dim=-1) # (B D)
if self.conv1d.bias is not None:
x = x + self.conv1d.bias
x = self.act(x).to(dtype=dtype)
else:
x = causal_conv1d_update(
x,
conv_state,
rearrange(self.conv1d.weight, "d 1 w -> d w"),
self.conv1d.bias,
self.activation,
)
if self.x_proj is not None:
x_db = self.x_proj(x) # (B, d_inner) -> (B, dt_rank + d_state + d_state)
dt, B, C = torch.split(x_db, self.tv_proj_dim, dim=-1) # (B, dt_rank), (B, d_state), (B, d_state)
# SSM step
### DELETED: selective_state_update function does not support the use of timevariant dt, B, C.
# if selective_state_update is None:
# dt = F.softplus(dt + self.dt_proj.bias.to(dtype=dt.dtype))
# dA = torch.exp(torch.einsum("bd,dn->bdn", dt, A))
# dB = torch.einsum("bd,bn->bdn", dt, B)
# ssm_state.copy_(ssm_state * dA + rearrange(x, "b d -> b d 1") * dB)
# y = torch.einsum("bdn,bn->bd", ssm_state.to(dtype), C)
# y = y + self.D.to(dtype) * x
# y = y * self.act(z) # (B D)
# else:
# y = selective_state_update(
# ssm_state, x, dt, A, B, C, self.D, z=z, dt_bias=self.dt_proj.bias, dt_softplus=True
# )
### MODIFIED: dt, B are now set based on the timevariant flags.
if not self.tv_dt:
dt = F.softplus(self.dt_proj.bias.to(dtype=x.dtype))
dt = repeat(dt, "d -> b d", b=x.shape[0]) # (B, d_inner)
else:
dt = F.linear(dt, self.dt_proj.weight) # (B, dt_rank) @ (dt_rank, d_inner) -> (B, d_inner)
dt = F.softplus(dt + self.dt_proj.bias.to(dtype=dt.dtype)) # (B, d_inner)
if not self.tv_B:
dB = torch.einsum("bd,dn->bdn", dt, self.B) # (B, d_inner, d_state)
else:
dB = torch.einsum("bd,bn->bdn", dt, B) # (B, d_inner, d_state)
A = -torch.exp(self.A_log.float()) # (d_inner, d_state)
dA = torch.exp(torch.einsum("bd,dn->bdn", dt, A))
ssm_state.copy_(ssm_state * dA + rearrange(x, "b d -> b d 1") * dB) # (B, d_inner, d_state)
### MODIFIED: C is now set based on the timevariant flags.
if not self.tv_C:
y = torch.einsum("bdn,dn->bd", ssm_state.to(dtype), self.C) # (B, d_inner, d_state) @ (d_inner, d_state) -> (B, d_inner)
else:
y = torch.einsum("bdn,bn->bd", ssm_state.to(dtype), C) # (B, d_inner, d_state) @ (B, d_state) -> (B, d_inner)
### MODIFIED: skip connection is now applied based on the use_D flag.
if self.D is not None:
y = y + self.D.to(dtype) * x # (B, d_inner) + (d_inner) * (B, d_inner) -> (B, d_inner)
y = y * self.act(z) # (B, d_inner)
out = self.out_proj(y) # (B, d_inner) -> (B, d_output)
return out.unsqueeze(1), conv_state, ssm_state
def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None, **kwargs):
device = self.out_proj.weight.device
conv_dtype = (self.conv1d.weight.dtype if hasattr(self.conv1d, "weight") else self.in_proj.weight.dtype) if dtype is None else dtype
conv_state = torch.zeros(
batch_size, self.d_model * self.expand, self.d_conv, device=device, dtype=conv_dtype
)
ssm_dtype = self.dt_proj.weight.dtype if dtype is None else dtype
# ssm_dtype = torch.float32
ssm_state = torch.zeros(
batch_size, self.d_model * self.expand, self.d_state, device=device, dtype=ssm_dtype
)
return conv_state, ssm_state
def _get_states_from_cache(self, inference_params, batch_size, initialize_states=False):
assert self.layer_idx is not None
if self.layer_idx not in inference_params.key_value_memory_dict:
batch_shape = (batch_size,)
conv_state = torch.zeros(
batch_size,
self.d_model * self.expand,
self.d_conv,
device=(self.conv1d.weight.device if hasattr(self.conv1d, "weight") else self.in_proj.weight.device),
dtype=(self.conv1d.weight.dtype if hasattr(self.conv1d, "weight") else self.in_proj.weight.dtype),
)
ssm_state = torch.zeros(
batch_size,
self.d_model * self.expand,
self.d_state,
device=self.dt_proj.weight.device,
dtype=self.dt_proj.weight.dtype,
# dtype=torch.float32,
)
inference_params.key_value_memory_dict[self.layer_idx] = (conv_state, ssm_state)
else:
conv_state, ssm_state = inference_params.key_value_memory_dict[self.layer_idx]
# TODO: What if batch size changes between generation, and we reuse the same states?
if initialize_states:
conv_state.zero_()
ssm_state.zero_()
return conv_state, ssm_state
================================================
FILE: layers/MultiWaveletCorrelation.py
================================================
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from typing import List, Tuple
import math
from functools import partial
from torch import nn, einsum, diagonal
from math import log2, ceil
import pdb
from sympy import Poly, legendre, Symbol, chebyshevt
from scipy.special import eval_legendre
def legendreDer(k, x):
def _legendre(k, x):
return (2 * k + 1) * eval_legendre(k, x)
out = 0
for i in np.arange(k - 1, -1, -2):
out += _legendre(i, x)
return out
def phi_(phi_c, x, lb=0, ub=1):
mask = np.logical_or(x < lb, x > ub) * 1.0
return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1 - mask)
def get_phi_psi(k, base):
x = Symbol('x')
phi_coeff = np.zeros((k, k))
phi_2x_coeff = np.zeros((k, k))
if base == 'legendre':
for ki in range(k):
coeff_ = Poly(legendre(ki, 2 * x - 1), x).all_coeffs()
phi_coeff[ki, :ki + 1] = np.flip(np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64))
coeff_ = Poly(legendre(ki, 4 * x - 1), x).all_coeffs()
phi_2x_coeff[ki, :ki + 1] = np.flip(np.sqrt(2) * np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64))
psi1_coeff = np.zeros((k, k))
psi2_coeff = np.zeros((k, k))
for ki in range(k):
psi1_coeff[ki, :] = phi_2x_coeff[ki, :]
for i in range(k):
a = phi_2x_coeff[ki, :ki + 1]
b = phi_coeff[i, :i + 1]
prod_ = np.convolve(a, b)
prod_[np.abs(prod_) < 1e-8] = 0
proj_ = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum()
psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :]
psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :]
for j in range(ki):
a = phi_2x_coeff[ki, :ki + 1]
b = psi1_coeff[j, :]
prod_ = np.convolve(a, b)
prod_[np.abs(prod_) < 1e-8] = 0
proj_ = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum()
psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :]
psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :]
a = psi1_coeff[ki, :]
prod_ = np.convolve(a, a)
prod_[np.abs(prod_) < 1e-8] = 0
norm1 = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum()
a = psi2_coeff[ki, :]
prod_ = np.convolve(a, a)
prod_[np.abs(prod_) < 1e-8] = 0
norm2 = (prod_ * 1 / (np.arange(len(prod_)) + 1) * (1 - np.power(0.5, 1 + np.arange(len(prod_))))).sum()
norm_ = np.sqrt(norm1 + norm2)
psi1_coeff[ki, :] /= norm_
psi2_coeff[ki, :] /= norm_
psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0
psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0
phi = [np.poly1d(np.flip(phi_coeff[i, :])) for i in range(k)]
psi1 = [np.poly1d(np.flip(psi1_coeff[i, :])) for i in range(k)]
psi2 = [np.poly1d(np.flip(psi2_coeff[i, :])) for i in range(k)]
elif base == 'chebyshev':
for ki in range(k):
if ki == 0:
phi_coeff[ki, :ki + 1] = np.sqrt(2 / np.pi)
phi_2x_coeff[ki, :ki + 1] = np.sqrt(2 / np.pi) * np.sqrt(2)
else:
coeff_ = Poly(chebyshevt(ki, 2 * x - 1), x).all_coeffs()
phi_coeff[ki, :ki + 1] = np.flip(2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
coeff_ = Poly(chebyshevt(ki, 4 * x - 1), x).all_coeffs()
phi_2x_coeff[ki, :ki + 1] = np.flip(
np.sqrt(2) * 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
phi = [partial(phi_, phi_coeff[i, :]) for i in range(k)]
x = Symbol('x')
kUse = 2 * k
roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
# x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
# not needed for our purpose here, we use even k always to avoid
wm = np.pi / kUse / 2
psi1_coeff = np.zeros((k, k))
psi2_coeff = np.zeros((k, k))
psi1 = [[] for _ in range(k)]
psi2 = [[] for _ in range(k)]
for ki in range(k):
psi1_coeff[ki, :] = phi_2x_coeff[ki, :]
for i in range(k):
proj_ = (wm * phi[i](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum()
psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :]
psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :]
for j in range(ki):
proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum()
psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :]
psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :]
psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5)
psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5, ub=1)
norm1 = (wm * psi1[ki](x_m) * psi1[ki](x_m)).sum()
norm2 = (wm * psi2[ki](x_m) * psi2[ki](x_m)).sum()
norm_ = np.sqrt(norm1 + norm2)
psi1_coeff[ki, :] /= norm_
psi2_coeff[ki, :] /= norm_
psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0
psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0
psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5 + 1e-16)
psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5 + 1e-16, ub=1)
return phi, psi1, psi2
def get_filter(base, k):
def psi(psi1, psi2, i, inp):
mask = (inp <= 0.5) * 1.0
return psi1[i](inp) * mask + psi2[i](inp) * (1 - mask)
if base not in ['legendre', 'chebyshev']:
raise Exception('Base not supported')
x = Symbol('x')
H0 = np.zeros((k, k))
H1 = np.zeros((k, k))
G0 = np.zeros((k, k))
G1 = np.zeros((k, k))
PHI0 = np.zeros((k, k))
PHI1 = np.zeros((k, k))
phi, psi1, psi2 = get_phi_psi(k, base)
if base == 'legendre':
roots = Poly(legendre(k, 2 * x - 1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
wm = 1 / k / legendreDer(k, 2 * x_m - 1) / eval_legendre(k - 1, 2 * x_m - 1)
for ki in range(k):
for kpi in range(k):
H0[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum()
G0[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum()
H1[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum()
G1[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum()
PHI0 = np.eye(k)
PHI1 = np.eye(k)
elif base == 'chebyshev':
x = Symbol('x')
kUse = 2 * k
roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots()
x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
# x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
# not needed for our purpose here, we use even k always to avoid
wm = np.pi / kUse / 2
for ki in range(k):
for kpi in range(k):
H0[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum()
G0[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum()
H1[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum()
G1[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum()
PHI0[ki, kpi] = (wm * phi[ki](2 * x_m) * phi[kpi](2 * x_m)).sum() * 2
PHI1[ki, kpi] = (wm * phi[ki](2 * x_m - 1) * phi[kpi](2 * x_m - 1)).sum() * 2
PHI0[np.abs(PHI0) < 1e-8] = 0
PHI1[np.abs(PHI1) < 1e-8] = 0
H0[np.abs(H0) < 1e-8] = 0
H1[np.abs(H1) < 1e-8] = 0
G0[np.abs(G0) < 1e-8] = 0
G1[np.abs(G1) < 1e-8] = 0
return H0, H1, G0, G1, PHI0, PHI1
class MultiWaveletTransform(nn.Module):
"""
1D multiwavelet block.
"""
def __init__(self, ich=1, k=8, alpha=16, c=128,
nCZ=1, L=0, base='legendre', attention_dropout=0.1):
super(MultiWaveletTransform, self).__init__()
print('base', base)
self.k = k
self.c = c
self.L = L
self.nCZ = nCZ
self.Lk0 = nn.Linear(ich, c * k)
self.Lk1 = nn.Linear(c * k, ich)
self.ich = ich
self.MWT_CZ = nn.ModuleList(MWT_CZ1d(k, alpha, L, c, base) for i in range(nCZ))
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
if L > S:
zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
values = torch.cat([values, zeros], dim=1)
keys = torch.cat([keys, zeros], dim=1)
else:
values = values[:, :L, :, :]
keys = keys[:, :L, :, :]
values = values.view(B, L, -1)
V = self.Lk0(values).view(B, L, self.c, -1)
for i in range(self.nCZ):
V = self.MWT_CZ[i](V)
if i < self.nCZ - 1:
V = F.relu(V)
V = self.Lk1(V.view(B, L, -1))
V = V.view(B, L, -1, D)
return (V.contiguous(), None)
class MultiWaveletCross(nn.Module):
"""
1D Multiwavelet Cross Attention layer.
"""
def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64,
k=8, ich=512,
L=0,
base='legendre',
mode_select_method='random',
initializer=None, activation='tanh',
**kwargs):
super(MultiWaveletCross, self).__init__()
print('base', base)
self.c = c
self.k = k
self.L = L
H0, H1, G0, G1, PHI0, PHI1 = get_filter(base, k)
H0r = H0 @ PHI0
G0r = G0 @ PHI0
H1r = H1 @ PHI1
G1r = G1 @ PHI1
H0r[np.abs(H0r) < 1e-8] = 0
H1r[np.abs(H1r) < 1e-8] = 0
G0r[np.abs(G0r) < 1e-8] = 0
G1r[np.abs(G1r) < 1e-8] = 0
self.max_item = 3
self.attn1 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
seq_len_kv=seq_len_kv, modes=modes, activation=activation,
mode_select_method=mode_select_method)
self.attn2 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
seq_len_kv=seq_len_kv, modes=modes, activation=activation,
mode_select_method=mode_select_method)
self.attn3 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
seq_len_kv=seq_len_kv, modes=modes, activation=activation,
mode_select_method=mode_select_method)
self.attn4 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
seq_len_kv=seq_len_kv, modes=modes, activation=activation,
mode_select_method=mode_select_method)
self.T0 = nn.Linear(k, k)
self.register_buffer('ec_s', torch.Tensor(
np.concatenate((H0.T, H1.T), axis=0)))
self.register_buffer('ec_d', torch.Tensor(
np.concatenate((G0.T, G1.T), axis=0)))
self.register_buffer('rc_e', torch.Tensor(
np.concatenate((H0r, G0r), axis=0)))
self.register_buffer('rc_o', torch.Tensor(
np.concatenate((H1r, G1r), axis=0)))
self.Lk = nn.Linear(ich, c * k)
self.Lq = nn.Linear(ich, c * k)
self.Lv = nn.Linear(ich, c * k)
self.out = nn.Linear(c * k, ich)
self.modes1 = modes
def forward(self, q, k, v, mask=None):
B, N, H, E = q.shape # (B, N, H, E) torch.Size([3, 768, 8, 2])
_, S, _, _ = k.shape # (B, S, H, E) torch.Size([3, 96, 8, 2])
q = q.view(q.shape[0], q.shape[1], -1)
k = k.view(k.shape[0], k.shape[1], -1)
v = v.view(v.shape[0], v.shape[1], -1)
q = self.Lq(q)
q = q.view(q.shape[0], q.shape[1], self.c, self.k)
k = self.Lk(k)
k = k.view(k.shape[0], k.shape[1], self.c, self.k)
v = self.Lv(v)
v = v.view(v.shape[0], v.shape[1], self.c, self.k)
if N > S:
zeros = torch.zeros_like(q[:, :(N - S), :]).float()
v = torch.cat([v, zeros], dim=1)
k = torch.cat([k, zeros], dim=1)
else:
v = v[:, :N, :, :]
k = k[:, :N, :, :]
ns = math.floor(np.log2(N))
nl = pow(2, math.ceil(np.log2(N)))
extra_q = q[:, 0:nl - N, :, :]
extra_k = k[:, 0:nl - N, :, :]
extra_v = v[:, 0:nl - N, :, :]
q = torch.cat([q, extra_q], 1)
k = torch.cat([k, extra_k], 1)
v = torch.cat([v, extra_v], 1)
Ud_q = torch.jit.annotate(List[Tuple[Tensor]], [])
Ud_k = torch.jit.annotate(List[Tuple[Tensor]], [])
Ud_v = torch.jit.annotate(List[Tuple[Tensor]], [])
Us_q = torch.jit.annotate(List[Tensor], [])
Us_k = torch.jit.annotate(List[Tensor], [])
Us_v = torch.jit.annotate(List[Tensor], [])
Ud = torch.jit.annotate(List[Tensor], [])
Us = torch.jit.annotate(List[Tensor], [])
# decompose
for i in range(ns - self.L):
# print('q shape',q.shape)
d, q = self.wavelet_transform(q)
Ud_q += [tuple([d, q])]
Us_q += [d]
for i in range(ns - self.L):
d, k = self.wavelet_transform(k)
Ud_k += [tuple([d, k])]
Us_k += [d]
for i in range(ns - self.L):
d, v = self.wavelet_transform(v)
Ud_v += [tuple([d, v])]
Us_v += [d]
for i in range(ns - self.L):
dk, sk = Ud_k[i], Us_k[i]
dq, sq = Ud_q[i], Us_q[i]
dv, sv = Ud_v[i], Us_v[i]
Ud += [self.attn1(dq[0], dk[0], dv[0], mask)[0] + self.attn2(dq[1], dk[1], dv[1], mask)[0]]
Us += [self.attn3(sq, sk, sv, mask)[0]]
v = self.attn4(q, k, v, mask)[0]
# reconstruct
for i in range(ns - 1 - self.L, -1, -1):
v = v + Us[i]
v = torch.cat((v, Ud[i]), -1)
v = self.evenOdd(v)
v = self.out(v[:, :N, :, :].contiguous().view(B, N, -1))
return (v.contiguous(), None)
def wavelet_transform(self, x):
xa = torch.cat([x[:, ::2, :, :],
x[:, 1::2, :, :],
], -1)
d = torch.matmul(xa, self.ec_d)
s = torch.matmul(xa, self.ec_s)
return d, s
def evenOdd(self, x):
B, N, c, ich = x.shape # (B, N, c, k)
assert ich == 2 * self.k
x_e = torch.matmul(x, self.rc_e)
x_o = torch.matmul(x, self.rc_o)
x = torch.zeros(B, N * 2, c, self.k,
device=x.device)
x[..., ::2, :, :] = x_e
x[..., 1::2, :, :] = x_o
return x
class FourierCrossAttentionW(nn.Module):
def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=16, activation='tanh',
mode_select_method='random'):
super(FourierCrossAttentionW, self).__init__()
print('corss fourier correlation used!')
self.in_channels = in_channels
self.out_channels = out_channels
self.modes1 = modes
self.activation = activation
def compl_mul1d(self, order, x, weights):
x_flag = True
w_flag = True
if not torch.is_complex(x):
x_flag = False
x = torch.complex(x, torch.zeros_like(x).to(x.device))
if not torch.is_complex(weights):
w_flag = False
weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
if x_flag or w_flag:
return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
else:
return torch.einsum(order, x.real, weights.real)
def forward(self, q, k, v, mask):
B, L, E, H = q.shape
xq = q.permute(0, 3, 2, 1) # size = [B, H, E, L] torch.Size([3, 8, 64, 512])
xk = k.permute(0, 3, 2, 1)
xv = v.permute(0, 3, 2, 1)
self.index_q = list(range(0, min(int(L // 2), self.modes1)))
self.index_k_v = list(range(0, min(int(xv.shape[3] // 2), self.modes1)))
# Compute Fourier coefficients
xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
xq_ft = torch.fft.rfft(xq, dim=-1)
for i, j in enumerate(self.index_q):
xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
xk_ft_ = torch.zeros(B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat)
xk_ft = torch.fft.rfft(xk, dim=-1)
for i, j in enumerate(self.index_k_v):
xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_))
if self.activation == 'tanh':
xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh())
elif self.activation == 'softmax':
xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
else:
raise Exception('{} actiation function is not implemented'.format(self.activation))
xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_)
xqkvw = xqkv_ft
out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
for i, j in enumerate(self.index_q):
out_ft[:, :, :, j] = xqkvw[:, :, :, i]
out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)).permute(0, 3, 2, 1)
# size = [B, L, H, E]
return (out, None)
class sparseKernelFT1d(nn.Module):
def __init__(self,
k, alpha, c=1,
nl=1,
initializer=None,
**kwargs):
super(sparseKernelFT1d, self).__init__()
self.modes1 = alpha
self.scale = (1 / (c * k * c * k))
self.weights1 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.float))
self.weights2 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.float))
self.weights1.requires_grad = True
self.weights2.requires_grad = True
self.k = k
def compl_mul1d(self, order, x, weights):
x_flag = True
w_flag = True
if not torch.is_complex(x):
x_flag = False
x = torch.complex(x, torch.zeros_like(x).to(x.device))
if not torch.is_complex(weights):
w_flag = False
weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
if x_flag or w_flag:
return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
else:
return torch.einsum(order, x.real, weights.real)
def forward(self, x):
B, N, c, k = x.shape # (B, N, c, k)
x = x.view(B, N, -1)
x = x.permute(0, 2, 1)
x_fft = torch.fft.rfft(x)
# Multiply relevant Fourier modes
l = min(self.modes1, N // 2 + 1)
out_ft = torch.zeros(B, c * k, N // 2 + 1, device=x.device, dtype=torch.cfloat)
out_ft[:, :, :l] = self.compl_mul1d("bix,iox->box", x_fft[:, :, :l],
torch.complex(self.weights1, self.weights2)[:, :, :l])
x = torch.fft.irfft(out_ft, n=N)
x = x.permute(0, 2, 1).view(B, N, c, k)
return x
# ##
class MWT_CZ1d(nn.Module):
def __init__(self,
k=3, alpha=64,
L=0, c=1,
base='legendre',
initializer=None,
**kwargs):
super(MWT_CZ1d, self).__init__()
self.k = k
self.L = L
H0, H1, G0, G1, PHI0, PHI1 = get_filter(base, k)
H0r = H0 @ PHI0
G0r = G0 @ PHI0
H1r = H1 @ PHI1
G1r = G1 @ PHI1
H0r[np.abs(H0r) < 1e-8] = 0
H1r[np.abs(H1r) < 1e-8] = 0
G0r[np.abs(G0r) < 1e-8] = 0
G1r[np.abs(G1r) < 1e-8] = 0
self.max_item = 3
self.A = sparseKernelFT1d(k, alpha, c)
self.B = sparseKernelFT1d(k, alpha, c)
self.C = sparseKernelFT1d(k, alpha, c)
self.T0 = nn.Linear(k, k)
self.register_buffer('ec_s', torch.Tensor(
np.concatenate((H0.T, H1.T), axis=0)))
self.register_buffer('ec_d', torch.Tensor(
np.concatenate((G0.T, G1.T), axis=0)))
self.register_buffer('rc_e', torch.Tensor(
np.concatenate((H0r, G0r), axis=0)))
self.register_buffer('rc_o', torch.Tensor(
np.concatenate((H1r, G1r), axis=0)))
def forward(self, x):
B, N, c, k = x.shape # (B, N, k)
ns = math.floor(np.log2(N))
nl = pow(2, math.ceil(np.log2(N)))
extra_x = x[:, 0:nl - N, :, :]
x = torch.cat([x, extra_x], 1)
Ud = torch.jit.annotate(List[Tensor], [])
Us = torch.jit.annotate(List[Tensor], [])
for i in range(ns - self.L):
d, x = self.wavelet_transform(x)
Ud += [self.A(d) + self.B(x)]
Us += [self.C(d)]
x = self.T0(x) # coarsest scale transform
# reconstruct
for i in range(ns - 1 - self.L, -1, -1):
x = x + Us[i]
x = torch.cat((x, Ud[i]), -1)
x = self.evenOdd(x)
x = x[:, :N, :, :]
return x
def wavelet_transform(self, x):
xa = torch.cat([x[:, ::2, :, :],
x[:, 1::2, :, :],
], -1)
d = torch.matmul(xa, self.ec_d)
s = torch.matmul(xa, self.ec_s)
return d, s
def evenOdd(self, x):
B, N, c, ich = x.shape # (B, N, c, k)
assert ich == 2 * self.k
x_e = torch.matmul(x, self.rc_e)
x_o = torch.matmul(x, self.rc_o)
x = torch.zeros(B, N * 2, c, self.k,
device=x.device)
x[..., ::2, :, :] = x_e
x[..., 1::2, :, :] = x_o
return x
================================================
FILE: layers/Pyraformer_EncDec.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.linear import Linear
from layers.SelfAttention_Family import AttentionLayer, FullAttention
from layers.Embed import DataEmbedding
import math
def get_mask(input_size, window_size, inner_size):
"""Get the attention mask of PAM-Naive"""
# Get the size of all layers
all_size = []
all_size.append(input_size)
for i in range(len(window_size)):
layer_size = math.floor(all_size[i] / window_size[i])
all_size.append(layer_size)
seq_length = sum(all_size)
mask = torch.zeros(seq_length, seq_length)
# get intra-scale mask
inner_window = inner_size // 2
for layer_idx in range(len(all_size)):
start = sum(all_size[:layer_idx])
for i in range(start, start + all_size[layer_idx]):
left_side = max(i - inner_window, start)
right_side = min(i + inner_window + 1, start + all_size[layer_idx])
mask[i, left_side:right_side] = 1
# get inter-scale mask
for layer_idx in range(1, len(all_size)):
start = sum(all_size[:layer_idx])
for i in range(start, start + all_size[layer_idx]):
left_side = (start - all_size[layer_idx - 1]) + \
(i - start) * window_size[layer_idx - 1]
if i == (start + all_size[layer_idx] - 1):
right_side = start
else:
right_side = (
start - all_size[layer_idx - 1]) + (i - start + 1) * window_size[layer_idx - 1]
mask[i, left_side:right_side] = 1
mask[left_side:right_side, i] = 1
mask = (1 - mask).bool()
return mask, all_size
def refer_points(all_sizes, window_size):
"""Gather features from PAM's pyramid sequences"""
input_size = all_sizes[0]
indexes = torch.zeros(input_size, len(all_sizes))
for i in range(input_size):
indexes[i][0] = i
former_index = i
for j in range(1, len(all_sizes)):
start = sum(all_sizes[:j])
inner_layer_idx = former_index - (start - all_sizes[j - 1])
former_index = start + \
min(inner_layer_idx // window_size[j - 1], all_sizes[j] - 1)
indexes[i][j] = former_index
indexes = indexes.unsqueeze(0).unsqueeze(3)
return indexes.long()
class RegularMask():
def __init__(self, mask):
self._mask = mask.unsqueeze(1)
@property
def mask(self):
return self._mask
class EncoderLayer(nn.Module):
""" Compose with two layers """
def __init__(self, d_model, d_inner, n_head, dropout=0.1, normalize_before=True):
super(EncoderLayer, self).__init__()
self.slf_attn = AttentionLayer(
FullAttention(mask_flag=True, factor=0,
attention_dropout=dropout, output_attention=False),
d_model, n_head)
self.pos_ffn = PositionwiseFeedForward(
d_model, d_inner, dropout=dropout, normalize_before=normalize_before)
def forward(self, enc_input, slf_attn_mask=None):
attn_mask = RegularMask(slf_attn_mask)
enc_output, _ = self.slf_attn(
enc_input, enc_input, enc_input, attn_mask=attn_mask)
enc_output = self.pos_ffn(enc_output)
return enc_output
class Encoder(nn.Module):
""" A encoder model with self attention mechanism. """
def __init__(self, configs, window_size, inner_size):
super().__init__()
d_bottleneck = configs.d_model//4
self.mask, self.all_size = get_mask(
configs.seq_len, window_size, inner_size)
self.indexes = refer_points(self.all_size, window_size)
self.layers = nn.ModuleList([
EncoderLayer(configs.d_model, configs.d_ff, configs.n_heads, dropout=configs.dropout,
normalize_before=False) for _ in range(configs.e_layers)
]) # naive pyramid attention
self.enc_embedding = DataEmbedding(
configs.enc_in, configs.d_model, configs.dropout)
self.conv_layers = Bottleneck_Construct(
configs.d_model, window_size, d_bottleneck)
def forward(self, x_enc, x_mark_enc):
seq_enc = self.enc_embedding(x_enc, x_mark_enc)
mask = self.mask.repeat(len(seq_enc), 1, 1).to(x_enc.device)
seq_enc = self.conv_layers(seq_enc)
for i in range(len(self.layers)):
seq_enc = self.layers[i](seq_enc, mask)
indexes = self.indexes.repeat(seq_enc.size(
0), 1, 1, seq_enc.size(2)).to(seq_enc.device)
indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2))
all_enc = torch.gather(seq_enc, 1, indexes)
seq_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1)
return seq_enc
class ConvLayer(nn.Module):
def __init__(self, c_in, window_size):
super(ConvLayer, self).__init__()
self.downConv = nn.Conv1d(in_channels=c_in,
out_channels=c_in,
kernel_size=window_size,
stride=window_size)
self.norm = nn.BatchNorm1d(c_in)
self.activation = nn.ELU()
def forward(self, x):
x = self.downConv(x)
x = self.norm(x)
x = self.activation(x)
return x
class Bottleneck_Construct(nn.Module):
"""Bottleneck convolution CSCM"""
def __init__(self, d_model, window_size, d_inner):
super(Bottleneck_Construct, self).__init__()
if not isinstance(window_size, list):
self.conv_layers = nn.ModuleList([
ConvLayer(d_inner, window_size),
ConvLayer(d_inner, window_size),
ConvLayer(d_inner, window_size)
])
else:
self.conv_layers = []
for i in range(len(window_size)):
self.conv_layers.append(ConvLayer(d_inner, window_size[i]))
self.conv_layers = nn.ModuleList(self.conv_layers)
self.up = Linear(d_inner, d_model)
self.down = Linear(d_model, d_inner)
self.norm = nn.LayerNorm(d_model)
def forward(self, enc_input):
temp_input = self.down(enc_input).permute(0, 2, 1)
all_inputs = []
for i in range(len(self.conv_layers)):
temp_input = self.conv_layers[i](temp_input)
all_inputs.append(temp_input)
all_inputs = torch.cat(all_inputs, dim=2).transpose(1, 2)
all_inputs = self.up(all_inputs)
all_inputs = torch.cat([enc_input, all_inputs], dim=1)
all_inputs = self.norm(all_inputs)
return all_inputs
class PositionwiseFeedForward(nn.Module):
""" Two-layer position-wise feed-forward neural network. """
def __init__(self, d_in, d_hid, dropout=0.1, normalize_before=True):
super().__init__()
self.normalize_before = normalize_before
self.w_1 = nn.Linear(d_in, d_hid)
self.w_2 = nn.Linear(d_hid, d_in)
self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
residual = x
if self.normalize_before:
x = self.layer_norm(x)
x = F.gelu(self.w_1(x))
x = self.dropout(x)
x = self.w_2(x)
x = self.dropout(x)
x = x + residual
if not self.normalize_before:
x = self.layer_norm(x)
return x
================================================
FILE: layers/SelfAttention_Family.py
================================================
import torch
import torch.nn as nn
import numpy as np
from math import sqrt
from utils.masking import TriangularCausalMask, ProbMask
from reformer_pytorch import LSHSelfAttention
from einops import rearrange, repeat
class DSAttention(nn.Module):
'''De-stationary Attention'''
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(DSAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
tau = 1.0 if tau is None else tau.unsqueeze(
1).unsqueeze(1) # B x 1 x 1 x 1
delta = 0.0 if delta is None else delta.unsqueeze(
1).unsqueeze(1) # B x 1 x 1 x S
# De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return V.contiguous(), A
else:
return V.contiguous(), None
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return V.contiguous(), A
else:
return V.contiguous(), None
class ProbAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
# Q [B, H, L, D]
B, H, L_K, E = K.shape
_, _, L_Q, _ = Q.shape
# calculate the sampled Q_K
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
# real U = U_part(factor*ln(L_k))*L_q
index_sample = torch.randint(L_K, (L_Q, sample_k))
K_sample = K_expand[:, :, torch.arange(
L_Q).unsqueeze(1), index_sample, :]
Q_K_sample = torch.matmul(
Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
# find the Top_k query with sparisty measurement
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
Q_reduce = Q[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
M_top, :] # factor*ln(L_q)
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
return Q_K, M_top
def _get_initial_context(self, V, L_Q):
B, H, L_V, D = V.shape
if not self.mask_flag:
# V_sum = V.sum(dim=-2)
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H,
L_Q, V_sum.shape[-1]).clone()
else: # use mask
# requires that L_Q == L_V, i.e. for self-attention only
assert (L_Q == L_V)
contex = V.cumsum(dim=-2)
return contex
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
B, H, L_V, D = V.shape
if self.mask_flag:
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
context_in[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :] = torch.matmul(attn, V).type_as(context_in)
if self.output_attention:
attns = (torch.ones([B, H, L_V, L_V]) /
L_V).type_as(attn).to(attn.device)
attns[torch.arange(B)[:, None, None], torch.arange(H)[
None, :, None], index, :] = attn
return context_in, attns
else:
return context_in, None
def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
B, L_Q, H, D = queries.shape
_, L_K, _, _ = keys.shape
queries = queries.transpose(2, 1)
keys = keys.transpose(2, 1)
values = values.transpose(2, 1)
U_part = self.factor * \
np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
u = self.factor * \
np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
U_part = U_part if U_part < L_K else L_K
u = u if u < L_Q else L_Q
scores_top, index = self._prob_QK(
queries, keys, sample_k=U_part, n_top=u)
# add scale factor
scale = self.scale or 1. / sqrt(D)
if scale is not None:
scores_top = scores_top * scale
# get the context
context = self._get_initial_context(values, L_Q)
# update the context with selected top_k queries
context, attn = self._update_context(
context, values, scores_top, index, L_Q, attn_mask)
return context.contiguous(), attn
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads, d_keys=None,
d_values=None):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask,
tau=tau,
delta=delta
)
out = out.view(B, L, -1)
return self.out_projection(out), attn
class ReformerLayer(nn.Module):
def __init__(self, attention, d_model, n_heads, d_keys=None,
d_values=None, causal=False, bucket_size=4, n_hashes=4):
super().__init__()
self.bucket_size = bucket_size
self.attn = LSHSelfAttention(
dim=d_model,
heads=n_heads,
bucket_size=bucket_size,
n_hashes=n_hashes,
causal=causal
)
def fit_length(self, queries):
# inside reformer: assert N % (bucket_size * 2) == 0
B, N, C = queries.shape
if N % (self.bucket_size * 2) == 0:
return queries
else:
# fill the time series
fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
def forward(self, queries, keys, values, attn_mask, tau, delta):
# in Reformer: defalut queries=keys
B, N, C = queries.shape
queries = self.attn(self.fit_length(queries))[:, :N, :]
return queries, None
class TwoStageAttentionLayer(nn.Module):
'''
The Two Stage Attention (TSA) Layer
input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
'''
def __init__(self, configs,
seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1):
super(TwoStageAttentionLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), d_model, n_heads)
self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), d_model, n_heads)
self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), d_model, n_heads)
self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
self.dropout = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.norm4 = nn.LayerNorm(d_model)
self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff),
nn.GELU(),
nn.Linear(d_ff, d_model))
self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff),
nn.GELU(),
nn.Linear(d_ff, d_model))
def forward(self, x, attn_mask=None, tau=None, delta=None):
# Cross Time Stage: Directly apply MSA to each dimension
batch = x.shape[0]
time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model')
time_enc, attn = self.time_attention(
time_in, time_in, time_in, attn_mask=None, tau=None, delta=None
)
dim_in = time_in + self.dropout(time_enc)
dim_in = self.norm1(dim_in)
dim_in = dim_in + self.dropout(self.MLP1(dim_in))
dim_in = self.norm2(dim_in)
# Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection
dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch)
batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch)
dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None)
dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None)
dim_enc = dim_send + self.dropout(dim_receive)
dim_enc = self.norm3(dim_enc)
dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
dim_enc = self.norm4(dim_enc)
final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch)
return final_out
================================================
FILE: layers/StandardNorm.py
================================================
import torch
import torch.nn as nn
class Normalize(nn.Module):
def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
"""
:param num_features: the number of features or channels
:param eps: a value added for numerical stability
:param affine: if True, RevIN has learnable affine parameters
"""
super(Normalize, self).__init__()
self.num_features = num_features
self.eps = eps
self.affine = affine
self.subtract_last = subtract_last
self.non_norm = non_norm
if self.affine:
self._init_params()
def forward(self, x, mode: str):
if mode == 'norm':
self._get_statistics(x)
x = self._normalize(x)
elif mode == 'denorm':
x = self._denormalize(x)
else:
raise NotImplementedError
return x
def _init_params(self):
# initialize RevIN params: (C,)
self.affine_weight = nn.Parameter(torch.ones(self.num_features))
self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
def _get_statistics(self, x):
dim2reduce = tuple(range(1, x.ndim - 1))
if self.subtract_last:
self.last = x[:, -1, :].unsqueeze(1)
else:
self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
def _normalize(self, x):
if self.non_norm:
return x
if self.subtract_last:
x = x - self.last
else:
x = x - self.mean
x = x / self.stdev
if self.affine:
x = x * self.affine_weight
x = x + self.affine_bias
return x
def _denormalize(self, x):
if self.non_norm:
return x
if self.affine:
x = x - self.affine_bias
x = x / (self.affine_weight + self.eps * self.eps)
x = x * self.stdev
if self.subtract_last:
x = x + self.last
else:
x = x + self.mean
return x
================================================
FILE: layers/TimeFilter_layers.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.normal import Normal
class GCN(nn.Module):
def __init__(self, dim, n_heads):
super().__init__()
self.proj = nn.Linear(dim, dim)
self.n_heads = n_heads
def forward(self, adj, x):
# adj [B, H, L, L]
B, L, D = x.shape
x = self.proj(x).view(B, L, self.n_heads, -1) # [B, L, H, D_]
adj = F.normalize(adj, p=1, dim=-1)
x = torch.einsum("bhij,bjhd->bihd", adj, x).contiguous() # [B, L, H, D_]
x = x.view(B, L, -1)
return x
###############################
# Ablation
###############################
def mask_topk_moe(adj, thre, n_vars, masks):
# adj: [B, H, L, L], thre: [B, H, L, 3]
if masks is None:
B, H, L, _ = adj.shape
N = L // n_vars
device = adj.device
dtype = torch.float32
print("Masks is None!")
masks = []
for k in range(L):
S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device)
T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N)).to(dtype).to(device)
ST = torch.ones(L).to(dtype).to(device) - S - T
masks.append(torch.stack([S, T, ST], dim=0))
# [L, 3, L]
masks = torch.stack(masks, dim=0)
adj_mask0 = adj * masks[:, 0, :]
adj_mask1 = adj * masks[:, 1, :]
adj_mask2 = adj * masks[:, 2, :]
adj_mask0[adj_mask0 <= thre[:, :, :, 0].unsqueeze(-1)] = 0
adj_mask1[adj_mask1 <= thre[:, :, :, 1].unsqueeze(-1)] = 0
adj_mask2[adj_mask2 <= thre[:, :, :, 2].unsqueeze(-1)] = 0
adj = adj_mask0 + adj_mask1 + adj_mask2
return adj
def mask_topk_area(adj, n_vars, masks, alpha=0.5):
# x: [B, H, L, L]
B, H, L, _ = adj.shape
N = L // n_vars
if masks is None:
device = adj.device
dtype = torch.float32
print("Masks is None!")
masks = []
for k in range(L):
S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device)
T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N)).to(dtype).to(device)
ST = torch.ones(L).to(dtype).to(device) - S - T
masks.append(torch.stack([S, T, ST], dim=0))
# [L, 3, L]
masks = torch.stack(masks, dim=0)
# masks [L, 3, L]
n0 = n_vars - 1
n1 = N - 1
n2 = L - n0 - n1 - 1
adj_mask0 = adj * masks[:, 0, :]
adj_mask1 = adj * masks[:, 1, :]
adj_mask2 = adj * masks[:, 2, :]
def apply_mask_to_region(adj_mask, n):
threshold_idx = int(n * alpha)
sorted_values, _ = torch.sort(adj_mask, dim=-1, descending=True)
threshold = sorted_values[:, :, :, threshold_idx]
return adj_mask * (adj_mask >= threshold.unsqueeze(-1))
adj_mask0 = apply_mask_to_region(adj_mask0, n0)
adj_mask1 = apply_mask_to_region(adj_mask1, n1)
adj_mask2 = apply_mask_to_region(adj_mask2, n2)
adj = adj_mask0 + adj_mask1 + adj_mask2
return adj
##########################
class mask_moe(nn.Module):
def __init__(self, n_vars, top_p=0.5, num_experts=3, in_dim=96):
super().__init__()
self.num_experts = num_experts
self.n_vars = n_vars
self.in_dim = in_dim
self.gate = nn.Linear(self.in_dim, num_experts, bias=False)
self.noise = nn.Linear(self.in_dim, num_experts, bias=False)
self.noisy_gating = 1 #True
self.softplus = nn.Softplus()
self.softmax = nn.Softmax(2)
self.top_p = top_p
def cv_squared(self, x):
eps = 1e-10
if x.shape[0] == 1:
return torch.tensor([0], device=x.device, dtype=x.dtype)
return x.float().var() / (x.float().mean() ** 2 + eps)
def cross_entropy(self, x):
eps = 1e-10
if x.shape[0] == 1:
return torch.tensor([0], device=x.device, dtype=x.dtype)
return -torch.mul(x, torch.log(x + eps)).sum(dim=1).mean()
def noisy_top_k_gating(self, x, is_training, noise_epsilon=1e-2):
clean_logits = self.gate(x)
if self.noisy_gating and is_training:
raw_noise = self.noise(x)
noise_stddev = ((self.softplus(raw_noise) + noise_epsilon))
noisy_logits = clean_logits + torch.randn_like(clean_logits) * noise_stddev
logits = noisy_logits
else:
logits = clean_logits
# Convert logits to probabilities
logits = self.softmax(logits)
loss_dynamic = self.cross_entropy(logits)
sorted_probs, sorted_indices = torch.sort(logits, descending=True)
cumulative_probs = torch.cumsum(sorted_probs, dim=-1)
mask = cumulative_probs > self.top_p
threshold_indices = mask.long().argmax(dim=-1)
threshold_mask = torch.nn.functional.one_hot(threshold_indices, num_classes=sorted_indices.size(-1)).bool()
mask = mask & ~threshold_mask
top_p_mask = torch.zeros_like(mask)
zero_indices = (mask == 0).nonzero(as_tuple=True)
top_p_mask[
zero_indices[0], zero_indices[1], sorted_indices[zero_indices[0], zero_indices[1], zero_indices[2]]] = 1
sorted_probs = torch.where(mask, 0.0, sorted_probs)
loss_importance = self.cv_squared(sorted_probs.sum(0))
lambda_2 = 0.1
loss = loss_importance + lambda_2 * loss_dynamic
return top_p_mask, loss
def forward(self, x, masks=None):
# x [B, H, L, L]
B, H, L, _ = x.shape
device = x.device
dtype = torch.float32
mask_base = torch.eye(L, device=device, dtype=dtype).unsqueeze(0).unsqueeze(0)
if self.top_p == 0.0:
return mask_base, 0.0
x = x.reshape(B * H, L, L)
gates, loss = self.noisy_top_k_gating(x, self.training)
gates = gates.reshape(B, H, L, -1).float()
# [B, H, L, 3]
if masks is None:
print("Masks is None!")
masks = []
N = L // self.n_vars
for k in range(L):
S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device)
T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N)).to(dtype).to(device)
ST = torch.ones(L).to(dtype).to(device) - S - T
masks.append(torch.stack([S, T, ST], dim=0))
# [L, 3, L]
masks = torch.stack(masks, dim=0)
mask = torch.einsum('bhli,lid->bhld', gates, masks) + mask_base
return mask, loss
def mask_topk(x, alpha=0.5, largest=False):
# B, L = x.shape[0], x.shape[-1]
# x: [B, H, L, L]
k = int(alpha * x.shape[-1])
_, topk_indices = torch.topk(x, k, dim=-1, largest=largest)
mask = torch.ones_like(x, dtype=torch.float32)
mask.scatter_(-1, topk_indices, 0) # 1 is topk
return mask # [B, H, L, L]
class GraphLearner(nn.Module):
def __init__(self, dim, n_vars, top_p=0.5, in_dim=96):
super().__init__()
self.dim = dim
self.proj_1 = nn.Linear(dim, dim)
self.proj_2 = nn.Linear(dim, dim)
self.n_vars = n_vars
self.mask_moe = mask_moe(n_vars, top_p=top_p, in_dim=in_dim)
def forward(self, x, masks=None, alpha=0.5):
# x: [B, H, L, D]
adj = F.gelu(torch.einsum('bhid,bhjd->bhij', self.proj_1(x), self.proj_2(x)))
adj = adj * mask_topk(adj, alpha) # KNN
mask, loss = self.mask_moe(adj, masks)
adj = adj * mask
return adj, loss # [B, H, L, L]
class GraphFilter(nn.Module):
def __init__(self, dim, n_vars, n_heads=4, scale=None, top_p=0.5, dropout=0., in_dim=96):
super().__init__()
self.dim = dim
self.n_heads = n_heads
self.scale = dim ** (-0.5) if scale is None else scale
self.dropout = nn.Dropout(dropout)
self.graph_learner = GraphLearner(self.dim // self.n_heads, n_vars, top_p, in_dim=in_dim)
self.graph_conv = GCN(self.dim, self.n_heads)
def forward(self, x, masks=None, alpha=0.5):
# x: [B, L, D]
B, L, D = x.shape
adj, loss = self.graph_learner(x.reshape(B, L, self.n_heads, -1).permute(0, 2, 1, 3), masks, alpha) # [B, H, L, L]
adj = torch.softmax(adj, dim=-1)
adj = self.dropout(adj)
out = self.graph_conv(adj, x)
return out, loss # [B, L, D]
class GraphBlock(nn.Module):
def __init__(self, dim, n_vars, d_ff=None, n_heads=4, top_p=0.5, dropout=0., in_dim=96):
super().__init__()
self.dim = dim
self.d_ff = dim * 4 if d_ff is None else d_ff
self.gnn = GraphFilter(self.dim, n_vars, n_heads, top_p=top_p, dropout=dropout, in_dim=in_dim)
self.norm1 = nn.LayerNorm(self.dim)
self.ffn = nn.Sequential(
nn.Linear(self.dim, self.d_ff),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(self.d_ff, self.dim),
)
self.norm2 = nn.LayerNorm(self.dim)
def forward(self, x, masks=None, alpha=0.5):
# x: [B, L, D], time_embed: [B, time_embed_dim]
out, loss = self.gnn(self.norm1(x), masks, alpha)
x = x + out
x = x + self.ffn(self.norm2(x))
return x, loss
class TimeFilter_Backbone(nn.Module):
def __init__(self, hidden_dim, n_vars, d_ff=None, n_heads=4, n_blocks=3, top_p=0.5, dropout=0., in_dim=96):
super().__init__()
self.dim = hidden_dim
self.d_ff = self.dim * 2 if d_ff is None else d_ff
# graph blocks
self.blocks = nn.ModuleList([
GraphBlock(self.dim, n_vars, self.d_ff, n_heads, top_p, dropout, in_dim)
for _ in range(n_blocks)
])
self.n_blocks = n_blocks
def forward(self, x, masks=None, alpha=0.5):
# x: [B, N, T]
moe_loss = 0.0
for block in self.blocks:
x, loss = block(x, masks, alpha)
moe_loss += loss
moe_loss /= self.n_blocks
return x, moe_loss # [B, N, T]
================================================
FILE: layers/Transformer_EncDec.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvLayer(nn.Module):
def __init__(self, c_in):
super(ConvLayer, self).__init__()
self.downConv = nn.Conv1d(in_channels=c_in,
out_channels=c_in,
kernel_size=3,
padding=2,
padding_mode='circular')
self.norm = nn.BatchNorm1d(c_in)
self.activation = nn.ELU()
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
def forward(self, x):
x = self.downConv(x.permute(0, 2, 1))
x = self.norm(x)
x = self.activation(x)
x = self.maxPool(x)
x = x.transpose(1, 2)
return x
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None, tau=None, delta=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask,
tau=tau, delta=delta
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y), attn
class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
def forward(self, x, attn_mask=None, tau=None, delta=None):
# x [B, L, D]
attns = []
if self.conv_layers is not None:
for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
delta = delta if i == 0 else None
x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
class DecoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask,
tau=tau, delta=None
)[0])
x = self.norm1(x)
x = x + self.dropout(self.cross_attention(
x, cross, cross,
attn_mask=cross_mask,
tau=tau, delta=delta
)[0])
y = x = self.norm2(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm3(x + y)
class Decoder(nn.Module):
def __init__(self, layers, norm_layer=None, projection=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
self.projection = projection
def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
for layer in self.layers:
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
if self.norm is not None:
x = self.norm(x)
if self.projection is not None:
x = self.projection(x)
return x
================================================
FILE: layers/__init__.py
================================================
================================================
FILE: models/Autoformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
import math
import numpy as np
class Model(nn.Module):
"""
Autoformer is the first method to achieve the series-wise connection,
with inherent O(LlogL) complexity
Paper link: https://openreview.net/pdf?id=I55UqU-M11y
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
# Decomp
kernel_size = configs.moving_avg
self.decomp = series_decomp(kernel_size)
# Embedding
self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AutoCorrelationLayer(
AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=my_Layernorm(configs.d_model)
)
# Decoder
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.decoder = Decoder(
[
DecoderLayer(
AutoCorrelationLayer(
AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
AutoCorrelationLayer(
AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.c_out,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=my_Layernorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
if self.task_name == 'imputation':
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.d_model * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# decomp init
mean = torch.mean(x_enc, dim=1).unsqueeze(
1).repeat(1, self.pred_len, 1)
zeros = torch.zeros([x_dec.shape[0], self.pred_len,
x_dec.shape[2]], device=x_enc.device)
seasonal_init, trend_init = self.decomp(x_enc)
# decoder input
trend_init = torch.cat(
[trend_init[:, -self.label_len:, :], mean], dim=1)
seasonal_init = torch.cat(
[seasonal_init[:, -self.label_len:, :], zeros], dim=1)
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# dec
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None,
trend=trend_init)
# final
dec_out = trend_part + seasonal_part
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# final
dec_out = self.projection(enc_out)
return dec_out
def anomaly_detection(self, x_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# final
dec_out = self.projection(enc_out)
return dec_out
def classification(self, x_enc, x_mark_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# Output
# the output transformer encoder/decoder embeddings don't include non-linearity
output = self.act(enc_out)
output = self.dropout(output)
# zero-out padding embeddings
output = output * x_mark_enc.unsqueeze(-1)
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/Chronos.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
from chronos import BaseChronosPipeline
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = BaseChronosPipeline.from_pretrained(
"amazon/chronos-bolt-base",
device_map="cuda", # use "cpu" for CPU inference and "mps" for Apple Silicon
torch_dtype=torch.bfloat16,
)
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
outputs = []
for i in range(x_enc.shape[-1]):
output = self.model.predict(x_enc[...,i], prediction_length=self.pred_len)
output = output.mean(dim=1)
outputs.append(output)
dec_out = torch.stack(outputs, dim=-1)
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/Chronos2.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
from chronos import BaseChronosPipeline
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = BaseChronosPipeline.from_pretrained("amazon/chronos-2", device_map="cuda")
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc.sub(means)
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc = x_enc.div(stdev)
B, L, C = x_enc.shape
x_enc = x_enc.permute(0, 2, 1)
quantiles, dec_out = self.model.predict_quantiles(x_enc.cpu().numpy(), prediction_length=self.pred_len, quantile_levels=[0.1, 0.5, 0.9])
dec_out = torch.stack(dec_out, dim=0).to(x_enc.device)
dec_out= dec_out.permute(0, 2, 1)
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/Crossformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, repeat
from layers.Crossformer_EncDec import scale_block, Encoder, Decoder, DecoderLayer
from layers.Embed import PatchEmbedding
from layers.SelfAttention_Family import AttentionLayer, FullAttention, TwoStageAttentionLayer
from models.PatchTST import FlattenHead
from math import ceil
class Model(nn.Module):
"""
Paper link: https://openreview.net/pdf?id=vSVLM2j9eie
"""
def __init__(self, configs):
super(Model, self).__init__()
self.enc_in = configs.enc_in
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.seg_len = 12
self.win_size = 2
self.task_name = configs.task_name
# The padding operation to handle invisible sgemnet length
self.pad_in_len = ceil(1.0 * configs.seq_len / self.seg_len) * self.seg_len
self.pad_out_len = ceil(1.0 * configs.pred_len / self.seg_len) * self.seg_len
self.in_seg_num = self.pad_in_len // self.seg_len
self.out_seg_num = ceil(self.in_seg_num / (self.win_size ** (configs.e_layers - 1)))
self.head_nf = configs.d_model * self.out_seg_num
# Embedding
self.enc_value_embedding = PatchEmbedding(configs.d_model, self.seg_len, self.seg_len, self.pad_in_len - configs.seq_len, 0)
self.enc_pos_embedding = nn.Parameter(
torch.randn(1, configs.enc_in, self.in_seg_num, configs.d_model))
self.pre_norm = nn.LayerNorm(configs.d_model)
# Encoder
self.encoder = Encoder(
[
scale_block(configs, 1 if l == 0 else self.win_size, configs.d_model, configs.n_heads, configs.d_ff,
1, configs.dropout,
self.in_seg_num if l == 0 else ceil(self.in_seg_num / self.win_size ** l), configs.factor
) for l in range(configs.e_layers)
]
)
# Decoder
self.dec_pos_embedding = nn.Parameter(
torch.randn(1, configs.enc_in, (self.pad_out_len // self.seg_len), configs.d_model))
self.decoder = Decoder(
[
DecoderLayer(
TwoStageAttentionLayer(configs, (self.pad_out_len // self.seg_len), configs.factor, configs.d_model, configs.n_heads,
configs.d_ff, configs.dropout),
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
self.seg_len,
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
# activation=configs.activation,
)
for l in range(configs.e_layers + 1)
],
)
if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len,
head_dropout=configs.dropout)
elif self.task_name == 'classification':
self.flatten = nn.Flatten(start_dim=-2)
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
self.head_nf * configs.enc_in, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# embedding
x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d = n_vars)
x_enc += self.enc_pos_embedding
x_enc = self.pre_norm(x_enc)
enc_out, attns = self.encoder(x_enc)
dec_in = repeat(self.dec_pos_embedding, 'b ts_d l d -> (repeat b) ts_d l d', repeat=x_enc.shape[0])
dec_out = self.decoder(dec_in, enc_out)
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# embedding
x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
x_enc += self.enc_pos_embedding
x_enc = self.pre_norm(x_enc)
enc_out, attns = self.encoder(x_enc)
dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1)
return dec_out
def anomaly_detection(self, x_enc):
# embedding
x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
x_enc += self.enc_pos_embedding
x_enc = self.pre_norm(x_enc)
enc_out, attns = self.encoder(x_enc)
dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1)
return dec_out
def classification(self, x_enc, x_mark_enc):
# embedding
x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
x_enc += self.enc_pos_embedding
x_enc = self.pre_norm(x_enc)
enc_out, attns = self.encoder(x_enc)
# Output from Non-stationary Transformer
output = self.flatten(enc_out[-1].permute(0, 1, 3, 2))
output = self.dropout(output)
output = output.reshape(output.shape[0], -1)
output = self.projection(output)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/DLinear.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Autoformer_EncDec import series_decomp
class Model(nn.Module):
"""
Paper link: https://arxiv.org/pdf/2205.13504.pdf
"""
def __init__(self, configs, individual=False):
"""
individual: Bool, whether shared model among different variates.
"""
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
self.pred_len = configs.seq_len
else:
self.pred_len = configs.pred_len
# Series decomposition block from Autoformer
self.decompsition = series_decomp(configs.moving_avg)
self.individual = individual
self.channels = configs.enc_in
if self.individual:
self.Linear_Seasonal = nn.ModuleList()
self.Linear_Trend = nn.ModuleList()
for i in range(self.channels):
self.Linear_Seasonal.append(
nn.Linear(self.seq_len, self.pred_len))
self.Linear_Trend.append(
nn.Linear(self.seq_len, self.pred_len))
self.Linear_Seasonal[i].weight = nn.Parameter(
(1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
self.Linear_Trend[i].weight = nn.Parameter(
(1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
else:
self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)
self.Linear_Seasonal.weight = nn.Parameter(
(1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
self.Linear_Trend.weight = nn.Parameter(
(1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
if self.task_name == 'classification':
self.projection = nn.Linear(
configs.enc_in * configs.seq_len, configs.num_class)
def encoder(self, x):
seasonal_init, trend_init = self.decompsition(x)
seasonal_init, trend_init = seasonal_init.permute(
0, 2, 1), trend_init.permute(0, 2, 1)
if self.individual:
seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len],
dtype=seasonal_init.dtype).to(seasonal_init.device)
trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len],
dtype=trend_init.dtype).to(trend_init.device)
for i in range(self.channels):
seasonal_output[:, i, :] = self.Linear_Seasonal[i](
seasonal_init[:, i, :])
trend_output[:, i, :] = self.Linear_Trend[i](
trend_init[:, i, :])
else:
seasonal_output = self.Linear_Seasonal(seasonal_init)
trend_output = self.Linear_Trend(trend_init)
x = seasonal_output + trend_output
return x.permute(0, 2, 1)
def forecast(self, x_enc):
# Encoder
return self.encoder(x_enc)
def imputation(self, x_enc):
# Encoder
return self.encoder(x_enc)
def anomaly_detection(self, x_enc):
# Encoder
return self.encoder(x_enc)
def classification(self, x_enc):
# Encoder
enc_out = self.encoder(x_enc)
# Output
# (batch_size, seq_length * d_model)
output = enc_out.reshape(enc_out.shape[0], -1)
# (batch_size, num_classes)
output = self.projection(output)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/ETSformer.py
================================================
import torch
import torch.nn as nn
from layers.Embed import DataEmbedding
from layers.ETSformer_EncDec import EncoderLayer, Encoder, DecoderLayer, Decoder, Transform
class Model(nn.Module):
"""
Paper link: https://arxiv.org/abs/2202.01381
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
self.pred_len = configs.seq_len
else:
self.pred_len = configs.pred_len
assert configs.e_layers == configs.d_layers, "Encoder and decoder layers must be equal"
# Embedding
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
configs.d_model, configs.n_heads, configs.enc_in, configs.seq_len, self.pred_len, configs.top_k,
dim_feedforward=configs.d_ff,
dropout=configs.dropout,
activation=configs.activation,
) for _ in range(configs.e_layers)
]
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
configs.d_model, configs.n_heads, configs.c_out, self.pred_len,
dropout=configs.dropout,
) for _ in range(configs.d_layers)
],
)
self.transform = Transform(sigma=0.2)
if self.task_name == 'classification':
self.act = torch.nn.functional.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
with torch.no_grad():
if self.training:
x_enc = self.transform.transform(x_enc)
res = self.enc_embedding(x_enc, x_mark_enc)
level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
growth, season = self.decoder(growths, seasons)
preds = level[:, -1:] + growth + season
return preds
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
res = self.enc_embedding(x_enc, x_mark_enc)
level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
growth, season = self.decoder(growths, seasons)
preds = level[:, -1:] + growth + season
return preds
def anomaly_detection(self, x_enc):
res = self.enc_embedding(x_enc, None)
level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
growth, season = self.decoder(growths, seasons)
preds = level[:, -1:] + growth + season
return preds
def classification(self, x_enc, x_mark_enc):
res = self.enc_embedding(x_enc, None)
_, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
growths = torch.sum(torch.stack(growths, 0), 0)[:, :self.seq_len, :]
seasons = torch.sum(torch.stack(seasons, 0), 0)[:, :self.seq_len, :]
enc_out = growths + seasons
output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity
output = self.dropout(output)
# Output
output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings
output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/FEDformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Embed import DataEmbedding
from layers.AutoCorrelation import AutoCorrelationLayer
from layers.FourierCorrelation import FourierBlock, FourierCrossAttention
from layers.MultiWaveletCorrelation import MultiWaveletCross, MultiWaveletTransform
from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
class Model(nn.Module):
"""
FEDformer performs the attention mechanism on frequency domain and achieved O(N) complexity
Paper link: https://proceedings.mlr.press/v162/zhou22g.html
"""
def __init__(self, configs, version='fourier', mode_select='random', modes=32):
"""
version: str, for FEDformer, there are two versions to choose, options: [Fourier, Wavelets].
mode_select: str, for FEDformer, there are two mode selection method, options: [random, low].
modes: int, modes to be selected.
"""
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.version = version
self.mode_select = mode_select
self.modes = modes
# Decomp
self.decomp = series_decomp(configs.moving_avg)
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
if self.version == 'Wavelets':
encoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=1, base='legendre')
decoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=1, base='legendre')
decoder_cross_att = MultiWaveletCross(in_channels=configs.d_model,
out_channels=configs.d_model,
seq_len_q=self.seq_len // 2 + self.pred_len,
seq_len_kv=self.seq_len,
modes=self.modes,
ich=configs.d_model,
base='legendre',
activation='tanh')
else:
encoder_self_att = FourierBlock(in_channels=configs.d_model,
out_channels=configs.d_model,
n_heads=configs.n_heads,
seq_len=self.seq_len,
modes=self.modes,
mode_select_method=self.mode_select)
decoder_self_att = FourierBlock(in_channels=configs.d_model,
out_channels=configs.d_model,
n_heads=configs.n_heads,
seq_len=self.seq_len // 2 + self.pred_len,
modes=self.modes,
mode_select_method=self.mode_select)
decoder_cross_att = FourierCrossAttention(in_channels=configs.d_model,
out_channels=configs.d_model,
seq_len_q=self.seq_len // 2 + self.pred_len,
seq_len_kv=self.seq_len,
modes=self.modes,
mode_select_method=self.mode_select,
num_heads=configs.n_heads)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AutoCorrelationLayer(
encoder_self_att, # instead of multi-head attention in transformer
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=my_Layernorm(configs.d_model)
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AutoCorrelationLayer(
decoder_self_att,
configs.d_model, configs.n_heads),
AutoCorrelationLayer(
decoder_cross_att,
configs.d_model, configs.n_heads),
configs.d_model,
configs.c_out,
configs.d_ff,
moving_avg=configs.moving_avg,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=my_Layernorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
if self.task_name == 'imputation':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# decomp init
mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
seasonal_init, trend_init = self.decomp(x_enc) # x - moving_avg, moving_avg
# decoder input
trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
seasonal_init = F.pad(seasonal_init[:, -self.label_len:, :], (0, 0, 0, self.pred_len))
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# dec
seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, trend=trend_init)
# final
dec_out = trend_part + seasonal_part
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# final
dec_out = self.projection(enc_out)
return dec_out
def anomaly_detection(self, x_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# final
dec_out = self.projection(enc_out)
return dec_out
def classification(self, x_enc, x_mark_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# Output
output = self.act(enc_out)
output = self.dropout(output)
output = output * x_mark_enc.unsqueeze(-1)
output = output.reshape(output.shape[0], -1)
output = self.projection(output)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/FiLM.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from scipy import signal
from scipy import special as ss
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def transition(N):
Q = np.arange(N, dtype=np.float64)
R = (2 * Q + 1)[:, None] # / theta
j, i = np.meshgrid(Q, Q)
A = np.where(i < j, -1, (-1.) ** (i - j + 1)) * R
B = (-1.) ** Q[:, None] * R
return A, B
class HiPPO_LegT(nn.Module):
def __init__(self, N, dt=1.0, discretization='bilinear'):
"""
N: the order of the HiPPO projection
dt: discretization step size - should be roughly inverse to the length of the sequence
"""
super(HiPPO_LegT, self).__init__()
self.N = N
A, B = transition(N)
C = np.ones((1, N))
D = np.zeros((1,))
A, B, _, _, _ = signal.cont2discrete((A, B, C, D), dt=dt, method=discretization)
B = B.squeeze(-1)
self.register_buffer('A', torch.Tensor(A).to(device))
self.register_buffer('B', torch.Tensor(B).to(device))
vals = np.arange(0.0, 1.0, dt)
self.register_buffer('eval_matrix', torch.Tensor(
ss.eval_legendre(np.arange(N)[:, None], 1 - 2 * vals).T).to(device))
def forward(self, inputs):
"""
inputs : (length, ...)
output : (length, ..., N) where N is the order of the HiPPO projection
"""
c = torch.zeros(inputs.shape[:-1] + tuple([self.N])).to(device)
cs = []
for f in inputs.permute([-1, 0, 1]):
f = f.unsqueeze(-1)
new = f @ self.B.unsqueeze(0)
c = F.linear(c, self.A) + new
cs.append(c)
return torch.stack(cs, dim=0)
def reconstruct(self, c):
return (self.eval_matrix @ c.unsqueeze(-1)).squeeze(-1)
class SpectralConv1d(nn.Module):
def __init__(self, in_channels, out_channels, seq_len, ratio=0.5):
"""
1D Fourier layer. It does FFT, linear transform, and Inverse FFT.
"""
super(SpectralConv1d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.ratio = ratio
self.modes = min(32, seq_len // 2)
self.index = list(range(0, self.modes))
self.scale = (1 / (in_channels * out_channels))
self.weights_real = nn.Parameter(
self.scale * torch.rand(in_channels, out_channels, len(self.index), dtype=torch.float))
self.weights_imag = nn.Parameter(
self.scale * torch.rand(in_channels, out_channels, len(self.index), dtype=torch.float))
def compl_mul1d(self, order, x, weights_real, weights_imag):
return torch.complex(torch.einsum(order, x.real, weights_real) - torch.einsum(order, x.imag, weights_imag),
torch.einsum(order, x.real, weights_imag) + torch.einsum(order, x.imag, weights_real))
def forward(self, x):
B, H, E, N = x.shape
x_ft = torch.fft.rfft(x)
out_ft = torch.zeros(B, H, self.out_channels, x.size(-1) // 2 + 1, device=x.device, dtype=torch.cfloat)
a = x_ft[:, :, :, :self.modes]
out_ft[:, :, :, :self.modes] = self.compl_mul1d("bjix,iox->bjox", a, self.weights_real, self.weights_imag)
x = torch.fft.irfft(out_ft, n=x.size(-1))
return x
class Model(nn.Module):
"""
Paper link: https://arxiv.org/abs/2205.08897
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.configs = configs
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.seq_len if configs.pred_len == 0 else configs.pred_len
self.seq_len_all = self.seq_len + self.label_len
self.layers = configs.e_layers
self.enc_in = configs.enc_in
self.e_layers = configs.e_layers
# b, s, f means b, f
self.affine_weight = nn.Parameter(torch.ones(1, 1, configs.enc_in))
self.affine_bias = nn.Parameter(torch.zeros(1, 1, configs.enc_in))
self.multiscale = [1, 2, 4]
self.window_size = [256]
configs.ratio = 0.5
self.legts = nn.ModuleList(
[HiPPO_LegT(N=n, dt=1. / self.pred_len / i) for n in self.window_size for i in self.multiscale])
self.spec_conv_1 = nn.ModuleList([SpectralConv1d(in_channels=n, out_channels=n,
seq_len=min(self.pred_len, self.seq_len),
ratio=configs.ratio) for n in
self.window_size for _ in range(len(self.multiscale))])
self.mlp = nn.Linear(len(self.multiscale) * len(self.window_size), 1)
if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.enc_in * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec_true, x_mark_dec):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
x_enc /= stdev
x_enc = x_enc * self.affine_weight + self.affine_bias
x_decs = []
jump_dist = 0
for i in range(0, len(self.multiscale) * len(self.window_size)):
x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
x_in = x_enc[:, -x_in_len:]
legt = self.legts[i]
x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
out1 = self.spec_conv_1[i](x_in_c)
if self.seq_len >= self.pred_len:
x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
else:
x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
x_decs.append(x_dec)
x_dec = torch.stack(x_decs, dim=-1)
x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
x_dec = x_dec - self.affine_bias
x_dec = x_dec / (self.affine_weight + 1e-10)
x_dec = x_dec * stdev
x_dec = x_dec + means
return x_dec
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
x_enc /= stdev
x_enc = x_enc * self.affine_weight + self.affine_bias
x_decs = []
jump_dist = 0
for i in range(0, len(self.multiscale) * len(self.window_size)):
x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
x_in = x_enc[:, -x_in_len:]
legt = self.legts[i]
x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
out1 = self.spec_conv_1[i](x_in_c)
if self.seq_len >= self.pred_len:
x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
else:
x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
x_decs.append(x_dec)
x_dec = torch.stack(x_decs, dim=-1)
x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
x_dec = x_dec - self.affine_bias
x_dec = x_dec / (self.affine_weight + 1e-10)
x_dec = x_dec * stdev
x_dec = x_dec + means
return x_dec
def anomaly_detection(self, x_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
x_enc /= stdev
x_enc = x_enc * self.affine_weight + self.affine_bias
x_decs = []
jump_dist = 0
for i in range(0, len(self.multiscale) * len(self.window_size)):
x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
x_in = x_enc[:, -x_in_len:]
legt = self.legts[i]
x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
out1 = self.spec_conv_1[i](x_in_c)
if self.seq_len >= self.pred_len:
x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
else:
x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
x_decs.append(x_dec)
x_dec = torch.stack(x_decs, dim=-1)
x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
x_dec = x_dec - self.affine_bias
x_dec = x_dec / (self.affine_weight + 1e-10)
x_dec = x_dec * stdev
x_dec = x_dec + means
return x_dec
def classification(self, x_enc, x_mark_enc):
x_enc = x_enc * self.affine_weight + self.affine_bias
x_decs = []
jump_dist = 0
for i in range(0, len(self.multiscale) * len(self.window_size)):
x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
x_in = x_enc[:, -x_in_len:]
legt = self.legts[i]
x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
out1 = self.spec_conv_1[i](x_in_c)
if self.seq_len >= self.pred_len:
x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
else:
x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
x_decs.append(x_dec)
x_dec = torch.stack(x_decs, dim=-1)
x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
# Output from Non-stationary Transformer
output = self.act(x_dec)
output = self.dropout(output)
output = output * x_mark_enc.unsqueeze(-1)
output = output.reshape(output.shape[0], -1)
output = self.projection(output)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/FreTS.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class Model(nn.Module):
"""
Paper link: https://arxiv.org/pdf/2311.06184.pdf
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
self.pred_len = configs.seq_len
else:
self.pred_len = configs.pred_len
self.embed_size = 128 # embed_size
self.hidden_size = 256 # hidden_size
self.pred_len = configs.pred_len
self.feature_size = configs.enc_in # channels
self.seq_len = configs.seq_len
self.channel_independence = configs.channel_independence
self.sparsity_threshold = 0.01
self.scale = 0.02
self.embeddings = nn.Parameter(torch.randn(1, self.embed_size))
self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
self.fc = nn.Sequential(
nn.Linear(self.seq_len * self.embed_size, self.hidden_size),
nn.LeakyReLU(),
nn.Linear(self.hidden_size, self.pred_len)
)
# dimension extension
def tokenEmb(self, x):
# x: [Batch, Input length, Channel]
x = x.permute(0, 2, 1)
x = x.unsqueeze(3)
# N*T*1 x 1*D = N*T*D
y = self.embeddings
return x * y
# frequency temporal learner
def MLP_temporal(self, x, B, N, L):
# [B, N, T, D]
x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on L dimension
y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2)
x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho")
return x
# frequency channel learner
def MLP_channel(self, x, B, N, L):
# [B, N, T, D]
x = x.permute(0, 2, 1, 3)
# [B, T, N, D]
x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on N dimension
y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1)
x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho")
x = x.permute(0, 2, 1, 3)
# [B, N, T, D]
return x
# frequency-domain MLPs
# dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights
# rb: the real part of bias, ib: the imaginary part of bias
def FreMLP(self, B, nd, dimension, x, r, i, rb, ib):
o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
device=x.device)
o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
device=x.device)
o1_real = F.relu(
torch.einsum('bijd,dd->bijd', x.real, r) - \
torch.einsum('bijd,dd->bijd', x.imag, i) + \
rb
)
o1_imag = F.relu(
torch.einsum('bijd,dd->bijd', x.imag, r) + \
torch.einsum('bijd,dd->bijd', x.real, i) + \
ib
)
y = torch.stack([o1_real, o1_imag], dim=-1)
y = F.softshrink(y, lambd=self.sparsity_threshold)
y = torch.view_as_complex(y)
return y
def forecast(self, x_enc):
# x: [Batch, Input length, Channel]
B, T, N = x_enc.shape
# embedding x: [B, N, T, D]
x = self.tokenEmb(x_enc)
bias = x
# [B, N, T, D]
if self.channel_independence == '0':
x = self.MLP_channel(x, B, N, T)
# [B, N, T, D]
x = self.MLP_temporal(x, B, N, T)
x = x + bias
x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1)
return x
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
else:
raise ValueError('Only forecast tasks implemented yet')
================================================
FILE: models/Informer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
from layers.SelfAttention_Family import ProbAttention, AttentionLayer
from layers.Embed import DataEmbedding
class Model(nn.Module):
"""
Informer with Propspare attention in O(LlogL) complexity
Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.label_len = configs.label_len
# Embedding
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
[
ConvLayer(
configs.d_model
) for l in range(configs.e_layers - 1)
] if configs.distil and ('forecast' in configs.task_name) else None,
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(
ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
configs.d_model, configs.n_heads),
AttentionLayer(
ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
if self.task_name == 'imputation':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
enc_out = self.enc_embedding(x_enc, x_mark_enc)
dec_out = self.dec_embedding(x_dec, x_mark_dec)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
return dec_out # [B, L, D]
def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E
x_enc = x_enc / std_enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
dec_out = self.dec_embedding(x_dec, x_mark_dec)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
dec_out = dec_out * std_enc + mean_enc
return dec_out # [B, L, D]
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# enc
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# final
dec_out = self.projection(enc_out)
return dec_out
def anomaly_detection(self, x_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# final
dec_out = self.projection(enc_out)
return dec_out
def classification(self, x_enc, x_mark_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# Output
output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity
output = self.dropout(output)
output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings
output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast':
dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'short_term_forecast':
dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/KANAD.py
================================================
import numpy as np
import torch
import torch.nn as nn
from einops import rearrange
class KANADModel(nn.Module):
def __init__(self, window: int, order: int, *args, **kwargs) -> None:
super().__init__()
self.order = order
self.window = window
self.channels = 2 * self.order + 1
self.register_buffer(
"orders",
self._create_custom_periodic_cosine(self.window, self.order).unsqueeze(
0
), # (1, order, window)
)
self.out_conv = nn.Conv1d(self.channels, 1, 1, bias=False)
self.act = nn.GELU()
self.bn1 = nn.BatchNorm1d(self.channels)
self.bn3 = nn.BatchNorm1d(1)
self.bn2 = nn.BatchNorm1d(self.channels)
self.init_conv = nn.Conv1d(self.channels, self.channels, 3, 1, 1, bias=False)
self.inner_conv = nn.Conv1d(self.channels, self.channels, 3, 1, 1, bias=False)
self.final_conv = nn.Linear(window, window)
def forward(self, x: torch.Tensor, return_last: bool = False, *args, **kwargs):
res = []
res.append(x.unsqueeze(1))
ff = torch.concat(
[self.orders.repeat(x.size(0), 1, 1)] # type: ignore
+ [torch.cos(order * x.unsqueeze(1)) for order in range(1, self.order + 1)]
+ [x.unsqueeze(1)],
dim=1,
) # batch,self.channel,window
res.append(ff)
ff = self.init_conv(ff)
ff = self.bn1(ff)
ff = self.act(ff)
ff = self.inner_conv(ff) + res.pop()
ff = self.bn2(ff)
ff = self.act(ff)
ff = self.out_conv(ff) + res.pop()
ff = self.bn3(ff)
ff = self.act(ff)
ff = self.final_conv(ff)
if return_last:
return ff.squeeze(1), ff
return ff.squeeze(1)
def _create_custom_periodic_cosine(self, window: int, period) -> torch.Tensor:
d = len(period) if isinstance(period, list) else period
pl = period if isinstance(period, list) else [i for i in range(1, period + 1)]
result = torch.empty(d, window, dtype=torch.float32)
for i, p in enumerate(pl):
t = torch.arange(0, 1, 1 / window, dtype=torch.float32) / p * 2 * np.pi
result[i, :] = torch.cos(t)
return result
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.order = configs.d_model
# Encoder
self.enc = KANADModel(window=self.seq_len, order=configs.d_model)
def anomaly_detection(self, x_enc):
## reshape the input [B, L, D] to [B * D, L]
x_input = rearrange(x_enc, "B L D -> (B D) L")
enc_out = self.enc(x_input)
# [B * D, L]
dec_out = rearrange(enc_out, "(B D) L -> B L D", B=x_enc.size(0))
# [B, L, D]
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if (
self.task_name == "long_term_forecast"
or self.task_name == "short_term_forecast"
):
raise NotImplementedError(
"Task forecasting for KANAD is temporarily not supported"
)
if self.task_name == "imputation":
raise NotImplementedError(
"Task imputation for KANAD is temporarily not supported"
)
if self.task_name == "anomaly_detection":
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == "classification":
raise NotImplementedError(
"Task classification for KANAD is temporarily not supported"
)
return None
================================================
FILE: models/Koopa.py
================================================
import math
import torch
import torch.nn as nn
from data_provider.data_factory import data_provider
class FourierFilter(nn.Module):
"""
Fourier Filter: to time-variant and time-invariant term
"""
def __init__(self, mask_spectrum):
super(FourierFilter, self).__init__()
self.mask_spectrum = mask_spectrum
def forward(self, x):
xf = torch.fft.rfft(x, dim=1)
mask = torch.ones_like(xf)
mask[:, self.mask_spectrum, :] = 0
x_var = torch.fft.irfft(xf*mask, dim=1)
x_inv = x - x_var
return x_var, x_inv
class MLP(nn.Module):
'''
Multilayer perceptron to encode/decode high dimension representation of sequential data
'''
def __init__(self,
f_in,
f_out,
hidden_dim=128,
hidden_layers=2,
dropout=0.05,
activation='tanh'):
super(MLP, self).__init__()
self.f_in = f_in
self.f_out = f_out
self.hidden_dim = hidden_dim
self.hidden_layers = hidden_layers
self.dropout = dropout
if activation == 'relu':
self.activation = nn.ReLU()
elif activation == 'tanh':
self.activation = nn.Tanh()
else:
raise NotImplementedError
layers = [nn.Linear(self.f_in, self.hidden_dim),
self.activation, nn.Dropout(self.dropout)]
for i in range(self.hidden_layers-2):
layers += [nn.Linear(self.hidden_dim, self.hidden_dim),
self.activation, nn.Dropout(dropout)]
layers += [nn.Linear(hidden_dim, f_out)]
self.layers = nn.Sequential(*layers)
def forward(self, x):
# x: B x S x f_in
# y: B x S x f_out
y = self.layers(x)
return y
class KPLayer(nn.Module):
"""
A demonstration of finding one step transition of linear system by DMD iteratively
"""
def __init__(self):
super(KPLayer, self).__init__()
self.K = None # B E E
def one_step_forward(self, z, return_rec=False, return_K=False):
B, input_len, E = z.shape
assert input_len > 1, 'snapshots number should be larger than 1'
x, y = z[:, :-1], z[:, 1:]
# solve linear system
self.K = torch.linalg.lstsq(x, y).solution # B E E
if torch.isnan(self.K).any():
print('Encounter K with nan, replace K by identity matrix')
self.K = torch.eye(self.K.shape[1]).to(self.K.device).unsqueeze(0).repeat(B, 1, 1)
z_pred = torch.bmm(z[:, -1:], self.K)
if return_rec:
z_rec = torch.cat((z[:, :1], torch.bmm(x, self.K)), dim=1)
return z_rec, z_pred
return z_pred
def forward(self, z, pred_len=1):
assert pred_len >= 1, 'prediction length should not be less than 1'
z_rec, z_pred= self.one_step_forward(z, return_rec=True)
z_preds = [z_pred]
for i in range(1, pred_len):
z_pred = torch.bmm(z_pred, self.K)
z_preds.append(z_pred)
z_preds = torch.cat(z_preds, dim=1)
return z_rec, z_preds
class KPLayerApprox(nn.Module):
"""
Find koopman transition of linear system by DMD with multistep K approximation
"""
def __init__(self):
super(KPLayerApprox, self).__init__()
self.K = None # B E E
self.K_step = None # B E E
def forward(self, z, pred_len=1):
# z: B L E, koopman invariance space representation
# z_rec: B L E, reconstructed representation
# z_pred: B S E, forecasting representation
B, input_len, E = z.shape
assert input_len > 1, 'snapshots number should be larger than 1'
x, y = z[:, :-1], z[:, 1:]
# solve linear system
self.K = torch.linalg.lstsq(x, y).solution # B E E
if torch.isnan(self.K).any():
print('Encounter K with nan, replace K by identity matrix')
self.K = torch.eye(self.K.shape[1]).to(self.K.device).unsqueeze(0).repeat(B, 1, 1)
z_rec = torch.cat((z[:, :1], torch.bmm(x, self.K)), dim=1) # B L E
if pred_len <= input_len:
self.K_step = torch.linalg.matrix_power(self.K, pred_len)
if torch.isnan(self.K_step).any():
print('Encounter multistep K with nan, replace it by identity matrix')
self.K_step = torch.eye(self.K_step.shape[1]).to(self.K_step.device).unsqueeze(0).repeat(B, 1, 1)
z_pred = torch.bmm(z[:, -pred_len:, :], self.K_step)
else:
self.K_step = torch.linalg.matrix_power(self.K, input_len)
if torch.isnan(self.K_step).any():
print('Encounter multistep K with nan, replace it by identity matrix')
self.K_step = torch.eye(self.K_step.shape[1]).to(self.K_step.device).unsqueeze(0).repeat(B, 1, 1)
temp_z_pred, all_pred = z, []
for _ in range(math.ceil(pred_len / input_len)):
temp_z_pred = torch.bmm(temp_z_pred, self.K_step)
all_pred.append(temp_z_pred)
z_pred = torch.cat(all_pred, dim=1)[:, :pred_len, :]
return z_rec, z_pred
class TimeVarKP(nn.Module):
"""
Koopman Predictor with DMD (analysitical solution of Koopman operator)
Utilize local variations within individual sliding window to predict the future of time-variant term
"""
def __init__(self,
enc_in=8,
input_len=96,
pred_len=96,
seg_len=24,
dynamic_dim=128,
encoder=None,
decoder=None,
multistep=False,
):
super(TimeVarKP, self).__init__()
self.input_len = input_len
self.pred_len = pred_len
self.enc_in = enc_in
self.seg_len = seg_len
self.dynamic_dim = dynamic_dim
self.multistep = multistep
self.encoder, self.decoder = encoder, decoder
self.freq = math.ceil(self.input_len / self.seg_len) # segment number of input
self.step = math.ceil(self.pred_len / self.seg_len) # segment number of output
self.padding_len = self.seg_len * self.freq - self.input_len
# Approximate mulitstep K by KPLayerApprox when pred_len is large
self.dynamics = KPLayerApprox() if self.multistep else KPLayer()
def forward(self, x):
# x: B L C
B, L, C = x.shape
res = torch.cat((x[:, L-self.padding_len:, :], x) ,dim=1)
res = res.chunk(self.freq, dim=1) # F x B P C, P means seg_len
res = torch.stack(res, dim=1).reshape(B, self.freq, -1) # B F PC
res = self.encoder(res) # B F H
x_rec, x_pred = self.dynamics(res, self.step) # B F H, B S H
x_rec = self.decoder(x_rec) # B F PC
x_rec = x_rec.reshape(B, self.freq, self.seg_len, self.enc_in)
x_rec = x_rec.reshape(B, -1, self.enc_in)[:, :self.input_len, :] # B L C
x_pred = self.decoder(x_pred) # B S PC
x_pred = x_pred.reshape(B, self.step, self.seg_len, self.enc_in)
x_pred = x_pred.reshape(B, -1, self.enc_in)[:, :self.pred_len, :] # B S C
return x_rec, x_pred
class TimeInvKP(nn.Module):
"""
Koopman Predictor with learnable Koopman operator
Utilize lookback and forecast window snapshots to predict the future of time-invariant term
"""
def __init__(self,
input_len=96,
pred_len=96,
dynamic_dim=128,
encoder=None,
decoder=None):
super(TimeInvKP, self).__init__()
self.dynamic_dim = dynamic_dim
self.input_len = input_len
self.pred_len = pred_len
self.encoder = encoder
self.decoder = decoder
K_init = torch.randn(self.dynamic_dim, self.dynamic_dim)
U, _, V = torch.svd(K_init) # stable initialization
self.K = nn.Linear(self.dynamic_dim, self.dynamic_dim, bias=False)
self.K.weight.data = torch.mm(U, V.t())
def forward(self, x):
# x: B L C
res = x.transpose(1, 2) # B C L
res = self.encoder(res) # B C H
res = self.K(res) # B C H
res = self.decoder(res) # B C S
res = res.transpose(1, 2) # B S C
return res
class Model(nn.Module):
'''
Paper link: https://arxiv.org/pdf/2305.18803.pdf
'''
def __init__(self, configs, dynamic_dim=128, hidden_dim=64, hidden_layers=2, num_blocks=3, multistep=False):
"""
mask_spectrum: list, shared frequency spectrums
seg_len: int, segment length of time series
dynamic_dim: int, latent dimension of koopman embedding
hidden_dim: int, hidden dimension of en/decoder
hidden_layers: int, number of hidden layers of en/decoder
num_blocks: int, number of Koopa blocks
multistep: bool, whether to use approximation for multistep K
alpha: float, spectrum filter ratio
"""
super(Model, self).__init__()
self.task_name = configs.task_name
self.enc_in = configs.enc_in
self.input_len = configs.seq_len
self.pred_len = configs.pred_len
self.seg_len = self.pred_len
self.num_blocks = num_blocks
self.dynamic_dim = dynamic_dim
self.hidden_dim = hidden_dim
self.hidden_layers = hidden_layers
self.multistep = multistep
self.alpha = 0.2
self.mask_spectrum = self._get_mask_spectrum(configs)
self.disentanglement = FourierFilter(self.mask_spectrum)
# shared encoder/decoder to make koopman embedding consistent
self.time_inv_encoder = MLP(f_in=self.input_len, f_out=self.dynamic_dim, activation='relu',
hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
self.time_inv_decoder = MLP(f_in=self.dynamic_dim, f_out=self.pred_len, activation='relu',
hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
self.time_inv_kps = self.time_var_kps = nn.ModuleList([
TimeInvKP(input_len=self.input_len,
pred_len=self.pred_len,
dynamic_dim=self.dynamic_dim,
encoder=self.time_inv_encoder,
decoder=self.time_inv_decoder)
for _ in range(self.num_blocks)])
# shared encoder/decoder to make koopman embedding consistent
self.time_var_encoder = MLP(f_in=self.seg_len*self.enc_in, f_out=self.dynamic_dim, activation='tanh',
hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
self.time_var_decoder = MLP(f_in=self.dynamic_dim, f_out=self.seg_len*self.enc_in, activation='tanh',
hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
self.time_var_kps = nn.ModuleList([
TimeVarKP(enc_in=configs.enc_in,
input_len=self.input_len,
pred_len=self.pred_len,
seg_len=self.seg_len,
dynamic_dim=self.dynamic_dim,
encoder=self.time_var_encoder,
decoder=self.time_var_decoder,
multistep=self.multistep)
for _ in range(self.num_blocks)])
def _get_mask_spectrum(self, configs):
"""
get shared frequency spectrums
"""
train_data, train_loader = data_provider(configs, 'train')
amps = 0.0
for data in train_loader:
lookback_window = data[0]
amps += abs(torch.fft.rfft(lookback_window, dim=1)).mean(dim=0).mean(dim=1)
mask_spectrum = amps.topk(int(amps.shape[0]*self.alpha)).indices
return mask_spectrum # as the spectrums of time-invariant component
def forecast(self, x_enc):
# Series Stationarization adopted from NSformer
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
x_enc = x_enc / std_enc
# Koopman Forecasting
residual, forecast = x_enc, None
for i in range(self.num_blocks):
time_var_input, time_inv_input = self.disentanglement(residual)
time_inv_output = self.time_inv_kps[i](time_inv_input)
time_var_backcast, time_var_output = self.time_var_kps[i](time_var_input)
residual = residual - time_var_backcast
if forecast is None:
forecast = (time_inv_output + time_var_output)
else:
forecast += (time_inv_output + time_var_output)
# Series Stationarization adopted from NSformer
res = forecast * std_enc + mean_enc
return res
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
if self.task_name == 'long_term_forecast':
dec_out = self.forecast(x_enc)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
================================================
FILE: models/LightTS.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class IEBlock(nn.Module):
def __init__(self, input_dim, hid_dim, output_dim, num_node):
super(IEBlock, self).__init__()
self.input_dim = input_dim
self.hid_dim = hid_dim
self.output_dim = output_dim
self.num_node = num_node
self._build()
def _build(self):
self.spatial_proj = nn.Sequential(
nn.Linear(self.input_dim, self.hid_dim),
nn.LeakyReLU(),
nn.Linear(self.hid_dim, self.hid_dim // 4)
)
self.channel_proj = nn.Linear(self.num_node, self.num_node)
torch.nn.init.eye_(self.channel_proj.weight)
self.output_proj = nn.Linear(self.hid_dim // 4, self.output_dim)
def forward(self, x):
x = self.spatial_proj(x.permute(0, 2, 1))
x = x.permute(0, 2, 1) + self.channel_proj(x.permute(0, 2, 1))
x = self.output_proj(x.permute(0, 2, 1))
x = x.permute(0, 2, 1)
return x
class Model(nn.Module):
"""
Paper link: https://arxiv.org/abs/2207.01186
"""
def __init__(self, configs, chunk_size=24):
"""
chunk_size: int, reshape T into [num_chunks, chunk_size]
"""
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
self.pred_len = configs.seq_len
else:
self.pred_len = configs.pred_len
if configs.task_name == 'long_term_forecast' or configs.task_name == 'short_term_forecast':
self.chunk_size = min(configs.pred_len, configs.seq_len, chunk_size)
else:
self.chunk_size = min(configs.seq_len, chunk_size)
# assert (self.seq_len % self.chunk_size == 0)
if self.seq_len % self.chunk_size != 0:
self.seq_len += (self.chunk_size - self.seq_len % self.chunk_size) # padding in order to ensure complete division
self.num_chunks = self.seq_len // self.chunk_size
self.d_model = configs.d_model
self.enc_in = configs.enc_in
self.dropout = configs.dropout
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.enc_in * configs.seq_len, configs.num_class)
self._build()
def _build(self):
self.layer_1 = IEBlock(
input_dim=self.chunk_size,
hid_dim=self.d_model // 4,
output_dim=self.d_model // 4,
num_node=self.num_chunks
)
self.chunk_proj_1 = nn.Linear(self.num_chunks, 1)
self.layer_2 = IEBlock(
input_dim=self.chunk_size,
hid_dim=self.d_model // 4,
output_dim=self.d_model // 4,
num_node=self.num_chunks
)
self.chunk_proj_2 = nn.Linear(self.num_chunks, 1)
self.layer_3 = IEBlock(
input_dim=self.d_model // 2,
hid_dim=self.d_model // 2,
output_dim=self.pred_len,
num_node=self.enc_in
)
self.ar = nn.Linear(self.seq_len, self.pred_len)
def encoder(self, x):
B, T, N = x.size()
# padding
x = torch.cat([x, torch.zeros((B, self.seq_len - T, N)).to(x.device)], dim=1)
highway = self.ar(x.permute(0, 2, 1))
highway = highway.permute(0, 2, 1)
# continuous sampling
x1 = x.reshape(B, self.num_chunks, self.chunk_size, N)
x1 = x1.permute(0, 3, 2, 1)
x1 = x1.reshape(-1, self.chunk_size, self.num_chunks)
x1 = self.layer_1(x1)
x1 = self.chunk_proj_1(x1).squeeze(dim=-1)
# interval sampling
x2 = x.reshape(B, self.chunk_size, self.num_chunks, N)
x2 = x2.permute(0, 3, 1, 2)
x2 = x2.reshape(-1, self.chunk_size, self.num_chunks)
x2 = self.layer_2(x2)
x2 = self.chunk_proj_2(x2).squeeze(dim=-1)
x3 = torch.cat([x1, x2], dim=-1)
x3 = x3.reshape(B, N, -1)
x3 = x3.permute(0, 2, 1)
out = self.layer_3(x3)
out = out + highway
return out
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
return self.encoder(x_enc)
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
return self.encoder(x_enc)
def anomaly_detection(self, x_enc):
return self.encoder(x_enc)
def classification(self, x_enc, x_mark_enc):
enc_out = self.encoder(x_enc)
# Output
output = enc_out.reshape(enc_out.shape[0], -1) # (batch_size, seq_length * d_model)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/MICN.py
================================================
import torch
import torch.nn as nn
from layers.Embed import DataEmbedding
from layers.Autoformer_EncDec import series_decomp, series_decomp_multi
import torch.nn.functional as F
class MIC(nn.Module):
"""
MIC layer to extract local and global features
"""
def __init__(self, feature_size=512, n_heads=8, dropout=0.05, decomp_kernel=[32], conv_kernel=[24],
isometric_kernel=[18, 6], device='cuda'):
super(MIC, self).__init__()
self.conv_kernel = conv_kernel
self.device = device
# isometric convolution
self.isometric_conv = nn.ModuleList([nn.Conv1d(in_channels=feature_size, out_channels=feature_size,
kernel_size=i, padding=0, stride=1)
for i in isometric_kernel])
# downsampling convolution: padding=i//2, stride=i
self.conv = nn.ModuleList([nn.Conv1d(in_channels=feature_size, out_channels=feature_size,
kernel_size=i, padding=i // 2, stride=i)
for i in conv_kernel])
# upsampling convolution
self.conv_trans = nn.ModuleList([nn.ConvTranspose1d(in_channels=feature_size, out_channels=feature_size,
kernel_size=i, padding=0, stride=i)
for i in conv_kernel])
self.decomp = nn.ModuleList([series_decomp(k) for k in decomp_kernel])
self.merge = torch.nn.Conv2d(in_channels=feature_size, out_channels=feature_size,
kernel_size=(len(self.conv_kernel), 1))
# feedforward network
self.conv1 = nn.Conv1d(in_channels=feature_size, out_channels=feature_size * 4, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=feature_size * 4, out_channels=feature_size, kernel_size=1)
self.norm1 = nn.LayerNorm(feature_size)
self.norm2 = nn.LayerNorm(feature_size)
self.norm = torch.nn.LayerNorm(feature_size)
self.act = torch.nn.Tanh()
self.drop = torch.nn.Dropout(0.05)
def conv_trans_conv(self, input, conv1d, conv1d_trans, isometric):
batch, seq_len, channel = input.shape
x = input.permute(0, 2, 1)
# downsampling convolution
x1 = self.drop(self.act(conv1d(x)))
x = x1
# isometric convolution
zeros = torch.zeros((x.shape[0], x.shape[1], x.shape[2] - 1), device=self.device)
x = torch.cat((zeros, x), dim=-1)
x = self.drop(self.act(isometric(x)))
x = self.norm((x + x1).permute(0, 2, 1)).permute(0, 2, 1)
# upsampling convolution
x = self.drop(self.act(conv1d_trans(x)))
x = x[:, :, :seq_len] # truncate
x = self.norm(x.permute(0, 2, 1) + input)
return x
def forward(self, src):
self.device = src.device
# multi-scale
multi = []
for i in range(len(self.conv_kernel)):
src_out, trend1 = self.decomp[i](src)
src_out = self.conv_trans_conv(src_out, self.conv[i], self.conv_trans[i], self.isometric_conv[i])
multi.append(src_out)
# merge
mg = torch.tensor([], device=self.device)
for i in range(len(self.conv_kernel)):
mg = torch.cat((mg, multi[i].unsqueeze(1).to(self.device)), dim=1)
mg = self.merge(mg.permute(0, 3, 1, 2)).squeeze(-2).permute(0, 2, 1)
y = self.norm1(mg)
y = self.conv2(self.conv1(y.transpose(-1, 1))).transpose(-1, 1)
return self.norm2(mg + y)
class SeasonalPrediction(nn.Module):
def __init__(self, embedding_size=512, n_heads=8, dropout=0.05, d_layers=1, decomp_kernel=[32], c_out=1,
conv_kernel=[2, 4], isometric_kernel=[18, 6], device='cuda'):
super(SeasonalPrediction, self).__init__()
self.mic = nn.ModuleList([MIC(feature_size=embedding_size, n_heads=n_heads,
decomp_kernel=decomp_kernel, conv_kernel=conv_kernel,
isometric_kernel=isometric_kernel, device=device)
for i in range(d_layers)])
self.projection = nn.Linear(embedding_size, c_out)
def forward(self, dec):
for mic_layer in self.mic:
dec = mic_layer(dec)
return self.projection(dec)
class Model(nn.Module):
"""
Paper link: https://openreview.net/pdf?id=zt53IDUR1U
"""
def __init__(self, configs, conv_kernel=[12, 16]):
"""
conv_kernel: downsampling and upsampling convolution kernel_size
"""
super(Model, self).__init__()
decomp_kernel = [] # kernel of decomposition operation
isometric_kernel = [] # kernel of isometric convolution
for ii in conv_kernel:
if ii % 2 == 0: # the kernel of decomposition operation must be odd
decomp_kernel.append(ii + 1)
isometric_kernel.append((configs.seq_len + configs.pred_len + ii) // ii)
else:
decomp_kernel.append(ii)
isometric_kernel.append((configs.seq_len + configs.pred_len + ii - 1) // ii)
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.seq_len = configs.seq_len
# Multiple Series decomposition block from FEDformer
self.decomp_multi = series_decomp_multi(decomp_kernel)
# embedding
self.dec_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.conv_trans = SeasonalPrediction(embedding_size=configs.d_model, n_heads=configs.n_heads,
dropout=configs.dropout,
d_layers=configs.d_layers, decomp_kernel=decomp_kernel,
c_out=configs.c_out, conv_kernel=conv_kernel,
isometric_kernel=isometric_kernel, device=torch.device('cuda:0'))
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
# refer to DLinear
self.regression = nn.Linear(configs.seq_len, configs.pred_len)
self.regression.weight = nn.Parameter(
(1 / configs.pred_len) * torch.ones([configs.pred_len, configs.seq_len]),
requires_grad=True)
if self.task_name == 'imputation':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.c_out * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Multi-scale Hybrid Decomposition
seasonal_init_enc, trend = self.decomp_multi(x_enc)
trend = self.regression(trend.permute(0, 2, 1)).permute(0, 2, 1)
# embedding
zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device)
seasonal_init_dec = torch.cat([seasonal_init_enc[:, -self.seq_len:, :], zeros], dim=1)
dec_out = self.dec_embedding(seasonal_init_dec, x_mark_dec)
dec_out = self.conv_trans(dec_out)
dec_out = dec_out[:, -self.pred_len:, :] + trend[:, -self.pred_len:, :]
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# Multi-scale Hybrid Decomposition
seasonal_init_enc, trend = self.decomp_multi(x_enc)
# embedding
dec_out = self.dec_embedding(seasonal_init_enc, x_mark_dec)
dec_out = self.conv_trans(dec_out)
dec_out = dec_out + trend
return dec_out
def anomaly_detection(self, x_enc):
# Multi-scale Hybrid Decomposition
seasonal_init_enc, trend = self.decomp_multi(x_enc)
# embedding
dec_out = self.dec_embedding(seasonal_init_enc, None)
dec_out = self.conv_trans(dec_out)
dec_out = dec_out + trend
return dec_out
def classification(self, x_enc, x_mark_enc):
# Multi-scale Hybrid Decomposition
seasonal_init_enc, trend = self.decomp_multi(x_enc)
# embedding
dec_out = self.dec_embedding(seasonal_init_enc, None)
dec_out = self.conv_trans(dec_out)
dec_out = dec_out + trend
# Output from Non-stationary Transformer
output = self.act(dec_out) # the output transformer encoder/decoder embeddings don't include non-linearity
output = self.dropout(output)
output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings
output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/MSGNet.py
================================================
import numpy as np
# import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.fft
from layers.Embed import DataEmbedding
from layers.MSGBlock import GraphBlock, simpleVIT, Attention_Block, Predict
def FFT_for_Period(x, k=2):
# [B, T, C]
xf = torch.fft.rfft(x, dim=1)
frequency_list = abs(xf).mean(0).mean(-1)
frequency_list[0] = 0
_, top_list = torch.topk(frequency_list, k)
top_list = top_list.detach().cpu().numpy()
period = x.shape[1] // top_list
return period, abs(xf).mean(-1)[:, top_list]
class ScaleGraphBlock(nn.Module):
def __init__(self, configs):
super(ScaleGraphBlock, self).__init__()
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.k = configs.top_k
self.att0 = Attention_Block(configs.d_model, configs.d_ff,
n_heads=configs.n_heads, dropout=configs.dropout, activation="gelu")
self.norm = nn.LayerNorm(configs.d_model)
self.gelu = nn.GELU()
self.gconv = nn.ModuleList()
for i in range(self.k):
self.gconv.append(
GraphBlock(configs.c_out , configs.d_model , configs.conv_channel, configs.skip_channel,
configs.gcn_depth , configs.dropout, configs.propalpha ,configs.seq_len,
configs.node_dim))
def forward(self, x):
B, T, N = x.size()
scale_list, scale_weight = FFT_for_Period(x, self.k)
res = []
for i in range(self.k):
scale = scale_list[i]
#Gconv
x = self.gconv[i](x)
# paddng
if (self.seq_len) % scale != 0:
length = (((self.seq_len) // scale) + 1) * scale
padding = torch.zeros([x.shape[0], (length - (self.seq_len)), x.shape[2]]).to(x.device)
out = torch.cat([x, padding], dim=1)
else:
length = self.seq_len
out = x
out = out.reshape(B, length // scale, scale, N)
#for Mul-attetion
out = out.reshape(-1 , scale , N)
out = self.norm(self.att0(out))
out = self.gelu(out)
out = out.reshape(B, -1 , scale , N).reshape(B ,-1 ,N)
# #for simpleVIT
# out = self.att(out.permute(0, 3, 1, 2).contiguous()) #return
# out = out.permute(0, 2, 3, 1).reshape(B, -1 ,N)
out = out[:, :self.seq_len, :]
res.append(out)
res = torch.stack(res, dim=-1)
# adaptive aggregation
scale_weight = F.softmax(scale_weight, dim=1)
scale_weight = scale_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1)
res = torch.sum(res * scale_weight, -1)
# residual connection
res = res + x
return res
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.configs = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.device = "cuda" if torch.cuda.is_available() else "cpu"
# for graph
# self.num_nodes = configs.c_out
# self.subgraph_size = configs.subgraph_size
# self.node_dim = configs.node_dim
# to return adj (node , node)
# self.graph = constructor_graph()
self.model = nn.ModuleList([ScaleGraphBlock(configs) for _ in range(configs.e_layers)])
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model,
configs.embed, configs.freq, configs.dropout)
self.layer = configs.e_layers
self.layer_norm = nn.LayerNorm(configs.d_model)
self.predict_linear = nn.Linear(
self.seq_len, self.pred_len + self.seq_len)
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
self.seq2pred = Predict(configs.individual, configs.c_out,
configs.seq_len, configs.pred_len, configs.dropout)
if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.d_model * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
# adp = self.graph(torch.arange(self.num_nodes).to(self.device))
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# porject back
dec_out = self.projection(enc_out)
dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len, 1))
return dec_out[:, -self.pred_len:, :]
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, L, N = x_enc.shape
# embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
# adp = self.graph(torch.arange(self.num_nodes).to(self.device))
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# porject back
dec_out = self.projection(enc_out)
# dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2)
# print(dec_out.shape)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(
1, L, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(
1, L, 1))
return dec_out
def anomaly_detection(self, x_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, L, N = x_enc.shape
# embedding
enc_out = self.enc_embedding(x_enc, None) # [B,T,C]
# adp = self.graph(torch.arange(self.num_nodes).to(self.device))
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# porject back
dec_out = self.projection(enc_out)
# dec_out = self.seq2pred(dec_out.transpose(1, 2)).transpose(1, 2)
# print(dec_out.shape)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(
1, L, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(
1, L, 1))
return dec_out
def classification(self, x_enc, x_mark_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# embedding
enc_out = self.enc_embedding(x_enc, None) # [B,T,C]
# adp = self.graph(torch.arange(self.num_nodes).to(self.device))
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
output = self.act(enc_out)
output = self.dropout(output)
# zero-out padding embeddings
output = output * x_mark_enc.unsqueeze(-1)
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/Mamba.py
================================================
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba_ssm import Mamba
from layers.Embed import DataEmbedding
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.d_inner = configs.d_model * configs.expand
self.dt_rank = math.ceil(configs.d_model / 16) # TODO implement "auto"
self.embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout)
self.mamba = Mamba(
d_model = configs.d_model,
d_state = configs.d_ff,
d_conv = configs.d_conv,
expand = configs.expand,
)
self.out_layer = nn.Linear(configs.d_model, configs.c_out, bias=False)
def forecast(self, x_enc, x_mark_enc):
mean_enc = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
x_enc = x_enc / std_enc
x = self.embedding(x_enc, x_mark_enc)
x = self.mamba(x)
x_out = self.out_layer(x)
x_out = x_out * std_enc + mean_enc
return x_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name in ['short_term_forecast', 'long_term_forecast']:
x_out = self.forecast(x_enc, x_mark_enc)
return x_out[:, -self.pred_len:, :]
# other tasks not implemented
================================================
FILE: models/MambaSimple.py
================================================
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, repeat, einsum
from layers.Embed import DataEmbedding
class Model(nn.Module):
"""
Mamba, linear-time sequence modeling with selective state spaces O(L)
Paper link: https://arxiv.org/abs/2312.00752
Implementation refernce: https://github.com/johnma2006/mamba-minimal/
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.d_inner = configs.d_model * configs.expand
self.dt_rank = math.ceil(configs.d_model / 16)
self.embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout)
self.layers = nn.ModuleList([ResidualBlock(configs, self.d_inner, self.dt_rank) for _ in range(configs.e_layers)])
self.norm = RMSNorm(configs.d_model)
self.out_layer = nn.Linear(configs.d_model, configs.c_out, bias=False)
def forecast(self, x_enc, x_mark_enc):
mean_enc = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
x_enc = x_enc / std_enc
x = self.embedding(x_enc, x_mark_enc)
for layer in self.layers:
x = layer(x)
x = self.norm(x)
x_out = self.out_layer(x)
x_out = x_out * std_enc + mean_enc
return x_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name in ['short_term_forecast', 'long_term_forecast']:
x_out = self.forecast(x_enc, x_mark_enc)
return x_out[:, -self.pred_len:, :]
class ResidualBlock(nn.Module):
def __init__(self, configs, d_inner, dt_rank):
super(ResidualBlock, self).__init__()
self.mixer = MambaBlock(configs, d_inner, dt_rank)
self.norm = RMSNorm(configs.d_model)
def forward(self, x):
output = self.mixer(self.norm(x)) + x
return output
class MambaBlock(nn.Module):
def __init__(self, configs, d_inner, dt_rank):
super(MambaBlock, self).__init__()
self.d_inner = d_inner
self.dt_rank = dt_rank
self.in_proj = nn.Linear(configs.d_model, self.d_inner * 2, bias=False)
self.conv1d = nn.Conv1d(
in_channels = self.d_inner,
out_channels = self.d_inner,
bias = True,
kernel_size = configs.d_conv,
padding = configs.d_conv - 1,
groups = self.d_inner
)
# takes in x and outputs the input-specific delta, B, C
self.x_proj = nn.Linear(self.d_inner, self.dt_rank + configs.d_ff * 2, bias=False)
# projects delta
self.dt_proj = nn.Linear(self.dt_rank, self.d_inner, bias=True)
A = repeat(torch.arange(1, configs.d_ff + 1), "n -> d n", d=self.d_inner).float()
self.A_log = nn.Parameter(torch.log(A))
self.D = nn.Parameter(torch.ones(self.d_inner))
self.out_proj = nn.Linear(self.d_inner, configs.d_model, bias=False)
def forward(self, x):
"""
Figure 3 in Section 3.4 in the paper
"""
(b, l, d) = x.shape
x_and_res = self.in_proj(x) # [B, L, 2 * d_inner]
(x, res) = x_and_res.split(split_size=[self.d_inner, self.d_inner], dim=-1)
x = rearrange(x, "b l d -> b d l")
x = self.conv1d(x)[:, :, :l]
x = rearrange(x, "b d l -> b l d")
x = F.silu(x)
y = self.ssm(x)
y = y * F.silu(res)
output = self.out_proj(y)
return output
def ssm(self, x):
"""
Algorithm 2 in Section 3.2 in the paper
"""
(d_in, n) = self.A_log.shape
A = -torch.exp(self.A_log.float()) # [d_in, n]
D = self.D.float() # [d_in]
x_dbl = self.x_proj(x) # [B, L, d_rank + 2 * d_ff]
(delta, B, C) = x_dbl.split(split_size=[self.dt_rank, n, n], dim=-1) # delta: [B, L, d_rank]; B, C: [B, L, n]
delta = F.softplus(self.dt_proj(delta)) # [B, L, d_in]
y = self.selective_scan(x, delta, A, B, C, D)
return y
def selective_scan(self, u, delta, A, B, C, D):
(b, l, d_in) = u.shape
n = A.shape[1]
deltaA = torch.exp(einsum(delta, A, "b l d, d n -> b l d n")) # A is discretized using zero-order hold (ZOH) discretization
deltaB_u = einsum(delta, B, u, "b l d, b l n, b l d -> b l d n") # B is discretized using a simplified Euler discretization instead of ZOH. From a discussion with authors: "A is the more important term and the performance doesn't change much with the simplification on B"
# selective scan, sequential instead of parallel
x = torch.zeros((b, d_in, n), device=deltaA.device)
ys = []
for i in range(l):
x = deltaA[:, i] * x + deltaB_u[:, i]
y = einsum(x, C[:, i, :], "b d n, b n -> b d")
ys.append(y)
y = torch.stack(ys, dim=1) # [B, L, d_in]
y = y + u * D
return y
class RMSNorm(nn.Module):
def __init__(self, d_model, eps=1e-5):
super(RMSNorm, self).__init__()
self.eps = eps
self.weight = nn.Parameter(torch.ones(d_model))
def forward(self, x):
output = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) * self.weight
return output
================================================
FILE: models/MambaSingleLayer.py
================================================
import torch.nn as nn
from layers.Embed import PositionalEmbedding
from layers.MambaBlock import Mamba_TimeVariant
class TokenEmbedding_cls(nn.Module):
"""TokenEmbedding with configurable kernel size(`d_kernel`).
"""
def __init__(self, c_in, d_model, d_kernel=3):
super().__init__()
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
kernel_size=d_kernel, padding='same', padding_mode='replicate', bias=False)
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(
m.weight, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
return x
class DataEmbedding_cls(nn.Module):
"""DataEmbedding with configurable kernel size(`d_kernel`) and sequence length(`seq_len`).
To solve the warning for EigenWorms dataset (seq_len=17984) while keeping consistency comparing with other models, we set max_len=max(5000, seq_len)."""
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1, d_kernel=3, seq_len=5000):
super(DataEmbedding_cls, self).__init__()
self.value_embedding = TokenEmbedding_cls(c_in=c_in, d_model=d_model, d_kernel=d_kernel)
self.position_embedding = PositionalEmbedding(d_model=d_model, max_len=max(5000, seq_len))
self.dropout = nn.Dropout(p=dropout) if dropout > 0 else nn.Identity()
def forward(self, x):
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x)
class Model(nn.Module):
"""MambaSL: Exploring Single-Layer Mamba for Time Series Classification
- Paper Link: https://openreview.net/pdf?id=YDl4vqQqGP
- Original Repo: https://github.com/yoom618/MambaSL. removed all extra codes for ablation study and further analysis.
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.c_out = configs.c_out
self.dropout = configs.dropout
self.num_kernels = configs.num_kernels
self.mamba = nn.Sequential(
Mamba_TimeVariant(
d_model = configs.d_model,
d_state = configs.d_ff,
d_conv = configs.d_conv,
expand = configs.expand,
timevariant_dt = bool(configs.tv_dt), # only available in Mamba_TimeVariant
timevariant_B = bool(configs.tv_B), # only available in Mamba_TimeVariant
timevariant_C = bool(configs.tv_C), # only available in Mamba_TimeVariant
use_D = bool(configs.use_D), # use D(skip connection) or not
device = configs.device,
),
nn.LayerNorm(configs.d_model),
nn.SiLU(), # simply choose the same activation fn as Mamba Block
)
if self.task_name in ['classification']: # one class per one sequence sample
self.embedding = DataEmbedding_cls(configs.enc_in, configs.d_model,
configs.embed, configs.freq, configs.dropout,
configs.num_kernels, configs.seq_len)
self.out_layer = nn.Sequential(
nn.Dropout(configs.dropout),
nn.Linear(configs.d_model, configs.num_class, bias=False)
)
nn.init.xavier_uniform_(self.out_layer[1].weight)
self.attn_weight = nn.Sequential(
nn.Linear(configs.d_model, configs.n_heads, bias=True),
nn.AdaptiveMaxPool1d(1),
nn.Softmax(dim=1),
)
for m in self.attn_weight.modules():
if isinstance(m, nn.Linear):
nn.init.zeros_(m.weight)
if m.bias is not None: m.bias.data.fill_(1.0)
else:
raise ValueError(f"task_name: {configs.task_name} is not valid.")
def forward(self, x_enc, x_mark_enc, x_dec=None, x_mark_dec=None, mask=None):
if self.task_name in ['classification']:
mamba_in = self.embedding(x_enc) # (B, L_in, D)
mamba_out = self.mamba(mamba_in) # (B, L_in, D)
### [proposed] use the gating value to make the final prediction
logit_out = self.out_layer(mamba_out) # (B, L_in, D) -> (B, L_in, C_out)
logit_out *= x_mark_enc.unsqueeze(2) # (B, L_in, C_out) # Mask out the padded sequence for variable length data (e.g. JapaneseVowels)
### Compute attention weights for weighted sum of logit_out
w_out = self.attn_weight(mamba_out) # (B, L_in, D) -> (B, L_in, n_head) -> (B, L_in, 1)
### calculate the weighted average of the hidden states to make the final prediction
out = logit_out * w_out # (B, L_in, C_out)
out = out.sum(1) # (B, C_out)
return out
else:
raise ValueError(f"task_name: {self.task_name} is not valid.")
================================================
FILE: models/Moirai.py
================================================
import numpy as np
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
from uni2ts.eval_util.plot import plot_single
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
from uni2ts.model.moirai_moe import MoiraiMoEForecast, MoiraiMoEModule
from uni2ts.model.moirai2 import Moirai2Forecast, Moirai2Module
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = Moirai2Forecast(
module=Moirai2Module.from_pretrained(
f"Salesforce/moirai-2.0-R-small",
),
prediction_length=configs.pred_len,
context_length=configs.seq_len,
target_dim=1,
feat_dynamic_real_dim=0,
past_feat_dynamic_real_dim=0,
).to('cuda')
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
outputs = []
for i in range(x_enc.shape[-1]):
output = self.model.predict(x_enc[...,i].cpu().numpy())
output = np.mean(output, axis=1)
outputs.append(torch.Tensor(output).to(x_enc.device))
dec_out = torch.stack(outputs, dim=-1)
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/MultiPatchFormer.py
================================================
import torch
import torch.nn as nn
import math
from einops import rearrange
from layers.SelfAttention_Family import AttentionLayer, FullAttention
class FeedForward(nn.Module):
def __init__(self, d_model: int, d_hidden: int = 512):
super(FeedForward, self).__init__()
self.linear_1 = torch.nn.Linear(d_model, d_hidden)
self.linear_2 = torch.nn.Linear(d_hidden, d_model)
self.activation = torch.nn.GELU()
def forward(self, x):
x = self.linear_1(x)
x = self.activation(x)
x = self.linear_2(x)
return x
class Encoder(nn.Module):
def __init__(
self,
d_model: int,
mha: AttentionLayer,
d_hidden: int,
dropout: float = 0,
channel_wise=False,
):
super(Encoder, self).__init__()
self.channel_wise = channel_wise
if self.channel_wise:
self.conv = torch.nn.Conv1d(
in_channels=d_model,
out_channels=d_model,
kernel_size=1,
stride=1,
padding=0,
padding_mode="reflect",
)
self.MHA = mha
self.feedforward = FeedForward(d_model=d_model, d_hidden=d_hidden)
self.dropout = torch.nn.Dropout(p=dropout)
self.layerNormal_1 = torch.nn.LayerNorm(d_model)
self.layerNormal_2 = torch.nn.LayerNorm(d_model)
def forward(self, x):
residual = x
q = residual
if self.channel_wise:
x_r = self.conv(x.permute(0, 2, 1)).transpose(1, 2)
k = x_r
v = x_r
else:
k = residual
v = residual
x, score = self.MHA(q, k, v, attn_mask=None)
x = self.dropout(x)
x = self.layerNormal_1(x + residual)
residual = x
x = self.feedforward(residual)
x = self.dropout(x)
x = self.layerNormal_2(x + residual)
return x, score
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.d_channel = configs.enc_in
self.N = configs.e_layers
# Embedding
self.d_model = configs.d_model
self.d_hidden = configs.d_ff
self.n_heads = configs.n_heads
self.mask = True
self.dropout = configs.dropout
self.stride1 = 8
self.patch_len1 = 8
self.stride2 = 8
self.patch_len2 = 16
self.stride3 = 7
self.patch_len3 = 24
self.stride4 = 6
self.patch_len4 = 32
self.patch_num1 = int((self.seq_len - self.patch_len2) // self.stride2) + 2
self.padding_patch_layer1 = nn.ReplicationPad1d((0, self.stride1))
self.padding_patch_layer2 = nn.ReplicationPad1d((0, self.stride2))
self.padding_patch_layer3 = nn.ReplicationPad1d((0, self.stride3))
self.padding_patch_layer4 = nn.ReplicationPad1d((0, self.stride4))
self.shared_MHA = nn.ModuleList(
[
AttentionLayer(
FullAttention(mask_flag=self.mask),
d_model=self.d_model,
n_heads=self.n_heads,
)
for _ in range(self.N)
]
)
self.shared_MHA_ch = nn.ModuleList(
[
AttentionLayer(
FullAttention(mask_flag=self.mask),
d_model=self.d_model,
n_heads=self.n_heads,
)
for _ in range(self.N)
]
)
self.encoder_list = nn.ModuleList(
[
Encoder(
d_model=self.d_model,
mha=self.shared_MHA[ll],
d_hidden=self.d_hidden,
dropout=self.dropout,
channel_wise=False,
)
for ll in range(self.N)
]
)
self.encoder_list_ch = nn.ModuleList(
[
Encoder(
d_model=self.d_model,
mha=self.shared_MHA_ch[0],
d_hidden=self.d_hidden,
dropout=self.dropout,
channel_wise=True,
)
for ll in range(self.N)
]
)
pe = torch.zeros(self.patch_num1, self.d_model)
for pos in range(self.patch_num1):
for i in range(0, self.d_model, 2):
wavelength = 10000 ** ((2 * i) / self.d_model)
pe[pos, i] = math.sin(pos / wavelength)
pe[pos, i + 1] = math.cos(pos / wavelength)
pe = pe.unsqueeze(0) # add a batch dimention to your pe matrix
self.register_buffer("pe", pe)
self.embedding_channel = nn.Conv1d(
in_channels=self.d_model * self.patch_num1,
out_channels=self.d_model,
kernel_size=1,
)
self.embedding_patch_1 = torch.nn.Conv1d(
in_channels=1,
out_channels=self.d_model // 4,
kernel_size=self.patch_len1,
stride=self.stride1,
)
self.embedding_patch_2 = torch.nn.Conv1d(
in_channels=1,
out_channels=self.d_model // 4,
kernel_size=self.patch_len2,
stride=self.stride2,
)
self.embedding_patch_3 = torch.nn.Conv1d(
in_channels=1,
out_channels=self.d_model // 4,
kernel_size=self.patch_len3,
stride=self.stride3,
)
self.embedding_patch_4 = torch.nn.Conv1d(
in_channels=1,
out_channels=self.d_model // 4,
kernel_size=self.patch_len4,
stride=self.stride4,
)
self.out_linear_1 = torch.nn.Linear(self.d_model, self.pred_len // 8)
self.out_linear_2 = torch.nn.Linear(
self.d_model + self.pred_len // 8, self.pred_len // 8
)
self.out_linear_3 = torch.nn.Linear(
self.d_model + 2 * self.pred_len // 8, self.pred_len // 8
)
self.out_linear_4 = torch.nn.Linear(
self.d_model + 3 * self.pred_len // 8, self.pred_len // 8
)
self.out_linear_5 = torch.nn.Linear(
self.d_model + self.pred_len // 2, self.pred_len // 8
)
self.out_linear_6 = torch.nn.Linear(
self.d_model + 5 * self.pred_len // 8, self.pred_len // 8
)
self.out_linear_7 = torch.nn.Linear(
self.d_model + 6 * self.pred_len // 8, self.pred_len // 8
)
self.out_linear_8 = torch.nn.Linear(
self.d_model + 7 * self.pred_len // 8,
self.pred_len - 7 * (self.pred_len // 8),
)
self.remap = torch.nn.Linear(self.d_model, self.seq_len)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# Multi-scale embedding
x_i = x_enc.permute(0, 2, 1)
x_i_p1 = x_i
x_i_p2 = self.padding_patch_layer2(x_i)
x_i_p3 = self.padding_patch_layer3(x_i)
x_i_p4 = self.padding_patch_layer4(x_i)
encoding_patch1 = self.embedding_patch_1(
rearrange(x_i_p1, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
).permute(0, 2, 1)
encoding_patch2 = self.embedding_patch_2(
rearrange(x_i_p2, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
).permute(0, 2, 1)
encoding_patch3 = self.embedding_patch_3(
rearrange(x_i_p3, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
).permute(0, 2, 1)
encoding_patch4 = self.embedding_patch_4(
rearrange(x_i_p4, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
).permute(0, 2, 1)
encoding_patch = (
torch.cat(
(encoding_patch1, encoding_patch2, encoding_patch3, encoding_patch4),
dim=-1,
)
+ self.pe
)
# Temporal encoding
for i in range(self.N):
encoding_patch = self.encoder_list[i](encoding_patch)[0]
# Channel-wise encoding
x_patch_c = rearrange(
encoding_patch, "(b c) p d -> b c (p d)", b=x_enc.shape[0], c=self.d_channel
)
x_ch = self.embedding_channel(x_patch_c.permute(0, 2, 1)).transpose(
1, 2
) # [b c d]
encoding_1_ch = self.encoder_list_ch[0](x_ch)[0]
# Semi Auto-regressive
forecast_ch1 = self.out_linear_1(encoding_1_ch)
forecast_ch2 = self.out_linear_2(
torch.cat((encoding_1_ch, forecast_ch1), dim=-1)
)
forecast_ch3 = self.out_linear_3(
torch.cat((encoding_1_ch, forecast_ch1, forecast_ch2), dim=-1)
)
forecast_ch4 = self.out_linear_4(
torch.cat((encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3), dim=-1)
)
forecast_ch5 = self.out_linear_5(
torch.cat(
(encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4),
dim=-1,
)
)
forecast_ch6 = self.out_linear_6(
torch.cat(
(
encoding_1_ch,
forecast_ch1,
forecast_ch2,
forecast_ch3,
forecast_ch4,
forecast_ch5,
),
dim=-1,
)
)
forecast_ch7 = self.out_linear_7(
torch.cat(
(
encoding_1_ch,
forecast_ch1,
forecast_ch2,
forecast_ch3,
forecast_ch4,
forecast_ch5,
forecast_ch6,
),
dim=-1,
)
)
forecast_ch8 = self.out_linear_8(
torch.cat(
(
encoding_1_ch,
forecast_ch1,
forecast_ch2,
forecast_ch3,
forecast_ch4,
forecast_ch5,
forecast_ch6,
forecast_ch7,
),
dim=-1,
)
)
final_forecast = torch.cat(
(
forecast_ch1,
forecast_ch2,
forecast_ch3,
forecast_ch4,
forecast_ch5,
forecast_ch6,
forecast_ch7,
forecast_ch8,
),
dim=-1,
).permute(0, 2, 1)
# De-Normalization
dec_out = final_forecast * (
stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len, 1)
)
dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if (
self.task_name == "long_term_forecast"
or self.task_name == "short_term_forecast"
):
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len :, :] # [B, L, D]
if self.task_name == "imputation":
raise NotImplementedError(
"Task imputation for WPMixer is temporarily not supported"
)
if self.task_name == "anomaly_detection":
raise NotImplementedError(
"Task anomaly_detection for WPMixer is temporarily not supported"
)
if self.task_name == "classification":
raise NotImplementedError(
"Task classification for WPMixer is temporarily not supported"
)
return None
================================================
FILE: models/Nonstationary_Transformer.py
================================================
import torch
import torch.nn as nn
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer
from layers.SelfAttention_Family import DSAttention, AttentionLayer
from layers.Embed import DataEmbedding
import torch.nn.functional as F
class Projector(nn.Module):
'''
MLP to learn the De-stationary factors
Paper link: https://openreview.net/pdf?id=ucNDIDRNjjv
'''
def __init__(self, enc_in, seq_len, hidden_dims, hidden_layers, output_dim, kernel_size=3):
super(Projector, self).__init__()
padding = 1 if torch.__version__ >= '1.5.0' else 2
self.series_conv = nn.Conv1d(in_channels=seq_len, out_channels=1, kernel_size=kernel_size, padding=padding,
padding_mode='circular', bias=False)
layers = [nn.Linear(2 * enc_in, hidden_dims[0]), nn.ReLU()]
for i in range(hidden_layers - 1):
layers += [nn.Linear(hidden_dims[i], hidden_dims[i + 1]), nn.ReLU()]
layers += [nn.Linear(hidden_dims[-1], output_dim, bias=False)]
self.backbone = nn.Sequential(*layers)
def forward(self, x, stats):
# x: B x S x E
# stats: B x 1 x E
# y: B x O
batch_size = x.shape[0]
x = self.series_conv(x) # B x 1 x E
x = torch.cat([x, stats], dim=1) # B x 2 x E
x = x.view(batch_size, -1) # B x 2E
y = self.backbone(x) # B x O
return y
class Model(nn.Module):
"""
Paper link: https://openreview.net/pdf?id=ucNDIDRNjjv
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.seq_len = configs.seq_len
self.label_len = configs.label_len
# Embedding
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
DSAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
# Decoder
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(
DSAttention(True, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
AttentionLayer(
DSAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
if self.task_name == 'imputation':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
self.tau_learner = Projector(enc_in=configs.enc_in, seq_len=configs.seq_len, hidden_dims=configs.p_hidden_dims,
hidden_layers=configs.p_hidden_layers, output_dim=1)
self.delta_learner = Projector(enc_in=configs.enc_in, seq_len=configs.seq_len,
hidden_dims=configs.p_hidden_dims, hidden_layers=configs.p_hidden_layers,
output_dim=configs.seq_len)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
x_raw = x_enc.clone().detach()
# Normalization
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E
x_enc = x_enc / std_enc
# B x S x E, B x 1 x E -> B x 1, positive scalar
tau = self.tau_learner(x_raw, std_enc)
threshold = 80.0
tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow
tau = tau_clamped.exp()
# B x S x E, B x 1 x E -> B x S
delta = self.delta_learner(x_raw, mean_enc)
x_dec_new = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_dec[:, -self.pred_len:, :])],
dim=1).to(x_enc.device).clone()
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
dec_out = self.dec_embedding(x_dec_new, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, tau=tau, delta=delta)
dec_out = dec_out * std_enc + mean_enc
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
x_raw = x_enc.clone().detach()
# Normalization
mean_enc = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
mean_enc = mean_enc.unsqueeze(1).detach()
x_enc = x_enc - mean_enc
x_enc = x_enc.masked_fill(mask == 0, 0)
std_enc = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5)
std_enc = std_enc.unsqueeze(1).detach()
x_enc /= std_enc
# B x S x E, B x 1 x E -> B x 1, positive scalar
tau = self.tau_learner(x_raw, std_enc)
threshold = 80.0
tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow
tau = tau_clamped.exp()
# B x S x E, B x 1 x E -> B x S
delta = self.delta_learner(x_raw, mean_enc)
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
dec_out = self.projection(enc_out)
dec_out = dec_out * std_enc + mean_enc
return dec_out
def anomaly_detection(self, x_enc):
x_raw = x_enc.clone().detach()
# Normalization
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E
x_enc = x_enc / std_enc
# B x S x E, B x 1 x E -> B x 1, positive scalar
tau = self.tau_learner(x_raw, std_enc)
threshold = 80.0
tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow
tau = tau_clamped.exp()
# B x S x E, B x 1 x E -> B x S
delta = self.delta_learner(x_raw, mean_enc)
# embedding
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
dec_out = self.projection(enc_out)
dec_out = dec_out * std_enc + mean_enc
return dec_out
def classification(self, x_enc, x_mark_enc):
x_raw = x_enc.clone().detach()
# Normalization
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
std_enc = torch.sqrt(
torch.var(x_enc - mean_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E
# B x S x E, B x 1 x E -> B x 1, positive scalar
tau = self.tau_learner(x_raw, std_enc)
threshold = 80.0
tau_clamped = torch.clamp(tau, max=threshold) # avoid numerical overflow
tau = tau_clamped.exp()
# B x S x E, B x 1 x E -> B x S
delta = self.delta_learner(x_raw, mean_enc)
# embedding
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
# Output
output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity
output = self.dropout(output)
output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
# (batch_size, num_classes)
output = self.projection(output)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, L, D]
return None
================================================
FILE: models/PAttn.py
================================================
import torch
import torch.nn as nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from einops import rearrange
class Model(nn.Module):
"""
Paper link: https://arxiv.org/abs/2406.16964
"""
def __init__(self, configs, patch_len=16, stride=8):
super().__init__()
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.patch_size = patch_len
self.stride = stride
self.d_model = configs.d_model
self.patch_num = (configs.seq_len - self.patch_size) // self.stride + 2
self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride))
self.in_layer = nn.Linear(self.patch_size, self.d_model)
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(1)
],
norm_layer=nn.LayerNorm(configs.d_model)
)
self.out_layer = nn.Linear(self.d_model * self.patch_num, configs.pred_len)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
B, _, C = x_enc.shape
x_enc = x_enc.permute(0, 2, 1)
x_enc = self.padding_patch_layer(x_enc)
x_enc = x_enc.unfold(dimension=-1, size=self.patch_size, step=self.stride)
enc_out = self.in_layer(x_enc)
enc_out = rearrange(enc_out, 'b c m l -> (b c) m l')
dec_out, _ = self.encoder(enc_out)
dec_out = rearrange(dec_out, '(b c) m l -> b c (m l)' , b=B , c=C)
dec_out = self.out_layer(dec_out)
dec_out = dec_out.permute(0, 2, 1)
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
================================================
FILE: models/PatchTST.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
class Transpose(nn.Module):
def __init__(self, *dims, contiguous=False):
super().__init__()
self.dims, self.contiguous = dims, contiguous
def forward(self, x):
if self.contiguous: return x.transpose(*self.dims).contiguous()
else: return x.transpose(*self.dims)
class FlattenHead(nn.Module):
def __init__(self, n_vars, nf, target_window, head_dropout=0):
super().__init__()
self.n_vars = n_vars
self.flatten = nn.Flatten(start_dim=-2)
self.linear = nn.Linear(nf, target_window)
self.dropout = nn.Dropout(head_dropout)
def forward(self, x): # x: [bs x nvars x d_model x patch_num]
x = self.flatten(x)
x = self.linear(x)
x = self.dropout(x)
return x
class Model(nn.Module):
"""
Paper link: https://arxiv.org/pdf/2211.14730.pdf
"""
def __init__(self, configs, patch_len=16, stride=8):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
padding = stride
# patching and embedding
self.patch_embedding = PatchEmbedding(
configs.d_model, patch_len, stride, padding, configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=nn.Sequential(Transpose(1,2), nn.BatchNorm1d(configs.d_model), Transpose(1,2))
)
# Prediction Head
self.head_nf = configs.d_model * \
int((configs.seq_len - patch_len) / stride + 2)
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len,
head_dropout=configs.dropout)
elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len,
head_dropout=configs.dropout)
elif self.task_name == 'classification':
self.flatten = nn.Flatten(start_dim=-2)
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
self.head_nf * configs.enc_in, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# do patching and embedding
x_enc = x_enc.permute(0, 2, 1)
# u: [bs * nvars x patch_num x d_model]
enc_out, n_vars = self.patch_embedding(x_enc)
# Encoder
# z: [bs * nvars x patch_num x d_model]
enc_out, attns = self.encoder(enc_out)
# z: [bs x nvars x patch_num x d_model]
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
# Decoder
dec_out = self.head(enc_out) # z: [bs x nvars x target_window]
dec_out = dec_out.permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# Normalization from Non-stationary Transformer
means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
means = means.unsqueeze(1).detach()
x_enc = x_enc - means
x_enc = x_enc.masked_fill(mask == 0, 0)
stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /
torch.sum(mask == 1, dim=1) + 1e-5)
stdev = stdev.unsqueeze(1).detach()
x_enc /= stdev
# do patching and embedding
x_enc = x_enc.permute(0, 2, 1)
# u: [bs * nvars x patch_num x d_model]
enc_out, n_vars = self.patch_embedding(x_enc)
# Encoder
# z: [bs * nvars x patch_num x d_model]
enc_out, attns = self.encoder(enc_out)
# z: [bs x nvars x patch_num x d_model]
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
# Decoder
dec_out = self.head(enc_out) # z: [bs x nvars x target_window]
dec_out = dec_out.permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
return dec_out
def anomaly_detection(self, x_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# do patching and embedding
x_enc = x_enc.permute(0, 2, 1)
# u: [bs * nvars x patch_num x d_model]
enc_out, n_vars = self.patch_embedding(x_enc)
# Encoder
# z: [bs * nvars x patch_num x d_model]
enc_out, attns = self.encoder(enc_out)
# z: [bs x nvars x patch_num x d_model]
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
# Decoder
dec_out = self.head(enc_out) # z: [bs x nvars x target_window]
dec_out = dec_out.permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
return dec_out
def classification(self, x_enc, x_mark_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# do patching and embedding
x_enc = x_enc.permute(0, 2, 1)
# u: [bs * nvars x patch_num x d_model]
enc_out, n_vars = self.patch_embedding(x_enc)
# Encoder
# z: [bs * nvars x patch_num x d_model]
enc_out, attns = self.encoder(enc_out)
# z: [bs x nvars x patch_num x d_model]
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
# Decoder
output = self.flatten(enc_out)
output = self.dropout(output)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/Pyraformer.py
================================================
import torch
import torch.nn as nn
from layers.Pyraformer_EncDec import Encoder
class Model(nn.Module):
"""
Pyraformer: Pyramidal attention to reduce complexity
Paper link: https://openreview.net/pdf?id=0EXmFzUn5I
"""
def __init__(self, configs, window_size=[4,4], inner_size=5):
"""
window_size: list, the downsample window size in pyramidal attention.
inner_size: int, the size of neighbour attention
"""
super().__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.d_model = configs.d_model
if self.task_name == 'short_term_forecast':
window_size = [2,2]
self.encoder = Encoder(configs, window_size, inner_size)
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.projection = nn.Linear(
(len(window_size)+1)*self.d_model, self.pred_len * configs.enc_in)
elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.projection = nn.Linear(
(len(window_size)+1)*self.d_model, configs.enc_in, bias=True)
elif self.task_name == 'classification':
self.act = torch.nn.functional.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
(len(window_size)+1)*self.d_model * configs.seq_len, configs.num_class)
def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :]
dec_out = self.projection(enc_out).view(
enc_out.size(0), self.pred_len, -1)
return dec_out
def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
# Normalization
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E
x_enc = x_enc / std_enc
enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :]
dec_out = self.projection(enc_out).view(
enc_out.size(0), self.pred_len, -1)
dec_out = dec_out * std_enc + mean_enc
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
enc_out = self.encoder(x_enc, x_mark_enc)
dec_out = self.projection(enc_out)
return dec_out
def anomaly_detection(self, x_enc, x_mark_enc):
enc_out = self.encoder(x_enc, x_mark_enc)
dec_out = self.projection(enc_out)
return dec_out
def classification(self, x_enc, x_mark_enc):
# enc
enc_out = self.encoder(x_enc, x_mark_enc=None)
# Output
# the output transformer encoder/decoder embeddings don't include non-linearity
output = self.act(enc_out)
output = self.dropout(output)
# zero-out padding embeddings
output = output * x_mark_enc.unsqueeze(-1)
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast':
dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'short_term_forecast':
dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc, x_mark_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/Reformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import ReformerLayer
from layers.Embed import DataEmbedding
class Model(nn.Module):
"""
Reformer with O(LlogL) complexity
Paper link: https://openreview.net/forum?id=rkgNKkHtvB
"""
def __init__(self, configs, bucket_size=4, n_hashes=4):
"""
bucket_size: int,
n_hashes: int,
"""
super(Model, self).__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
self.seq_len = configs.seq_len
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
ReformerLayer(None, configs.d_model, configs.n_heads,
bucket_size=bucket_size, n_hashes=n_hashes),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.d_model * configs.seq_len, configs.num_class)
else:
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# add placeholder
x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
if x_mark_enc is not None:
x_mark_enc = torch.cat(
[x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out)
return dec_out # [B, L, D]
def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization
mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
x_enc = x_enc - mean_enc
std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E
x_enc = x_enc / std_enc
# add placeholder
x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
if x_mark_enc is not None:
x_mark_enc = torch.cat(
[x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out)
dec_out = dec_out * std_enc + mean_enc
return dec_out # [B, L, D]
def imputation(self, x_enc, x_mark_enc):
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
enc_out, attns = self.encoder(enc_out)
enc_out = self.projection(enc_out)
return enc_out # [B, L, D]
def anomaly_detection(self, x_enc):
enc_out = self.enc_embedding(x_enc, None) # [B,T,C]
enc_out, attns = self.encoder(enc_out)
enc_out = self.projection(enc_out)
return enc_out # [B, L, D]
def classification(self, x_enc, x_mark_enc):
# enc
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out)
# Output
# the output transformer encoder/decoder embeddings don't include non-linearity
output = self.act(enc_out)
output = self.dropout(output)
# zero-out padding embeddings
output = output * x_mark_enc.unsqueeze(-1)
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast':
dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'short_term_forecast':
dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/SCINet.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
class Splitting(nn.Module):
def __init__(self):
super(Splitting, self).__init__()
def even(self, x):
return x[:, ::2, :]
def odd(self, x):
return x[:, 1::2, :]
def forward(self, x):
# return the odd and even part
return self.even(x), self.odd(x)
class CausalConvBlock(nn.Module):
def __init__(self, d_model, kernel_size=5, dropout=0.0):
super(CausalConvBlock, self).__init__()
module_list = [
nn.ReplicationPad1d((kernel_size - 1, kernel_size - 1)),
nn.Conv1d(d_model, d_model,
kernel_size=kernel_size),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Dropout(dropout),
nn.Conv1d(d_model, d_model,
kernel_size=kernel_size),
nn.Tanh()
]
self.causal_conv = nn.Sequential(*module_list)
def forward(self, x):
return self.causal_conv(x) # return value is the same as input dimension
class SCIBlock(nn.Module):
def __init__(self, d_model, kernel_size=5, dropout=0.0):
super(SCIBlock, self).__init__()
self.splitting = Splitting()
self.modules_even, self.modules_odd, self.interactor_even, self.interactor_odd = [CausalConvBlock(d_model) for _ in range(4)]
def forward(self, x):
x_even, x_odd = self.splitting(x)
x_even = x_even.permute(0, 2, 1)
x_odd = x_odd.permute(0, 2, 1)
x_even_temp = x_even.mul(torch.exp(self.modules_even(x_odd)))
x_odd_temp = x_odd.mul(torch.exp(self.modules_odd(x_even)))
x_even_update = x_even_temp + self.interactor_even(x_odd_temp)
x_odd_update = x_odd_temp - self.interactor_odd(x_even_temp)
return x_even_update.permute(0, 2, 1), x_odd_update.permute(0, 2, 1)
class SCINet(nn.Module):
def __init__(self, d_model, current_level=3, kernel_size=5, dropout=0.0):
super(SCINet, self).__init__()
self.current_level = current_level
self.working_block = SCIBlock(d_model, kernel_size, dropout)
if current_level != 0:
self.SCINet_Tree_odd = SCINet(d_model, current_level-1, kernel_size, dropout)
self.SCINet_Tree_even = SCINet(d_model, current_level-1, kernel_size, dropout)
def forward(self, x):
odd_flag = False
if x.shape[1] % 2 == 1:
odd_flag = True
x = torch.cat((x, x[:, -1:, :]), dim=1)
x_even_update, x_odd_update = self.working_block(x)
if odd_flag:
x_odd_update = x_odd_update[:, :-1]
if self.current_level == 0:
return self.zip_up_the_pants(x_even_update, x_odd_update)
else:
return self.zip_up_the_pants(self.SCINet_Tree_even(x_even_update), self.SCINet_Tree_odd(x_odd_update))
def zip_up_the_pants(self, even, odd):
even = even.permute(1, 0, 2)
odd = odd.permute(1, 0, 2)
even_len = even.shape[0]
odd_len = odd.shape[0]
min_len = min(even_len, odd_len)
zipped_data = []
for i in range(min_len):
zipped_data.append(even[i].unsqueeze(0))
zipped_data.append(odd[i].unsqueeze(0))
if even_len > odd_len:
zipped_data.append(even[-1].unsqueeze(0))
return torch.cat(zipped_data,0).permute(1, 0, 2)
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
# You can set the number of SCINet stacks by argument "d_layers", but should choose 1 or 2.
self.num_stacks = configs.d_layers
if self.num_stacks == 1:
self.sci_net_1 = SCINet(configs.enc_in, dropout=configs.dropout)
self.projection_1 = nn.Conv1d(self.seq_len, self.seq_len + self.pred_len, kernel_size=1, stride=1, bias=False)
else:
self.sci_net_1, self.sci_net_2 = [SCINet(configs.enc_in, dropout=configs.dropout) for _ in range(2)]
self.projection_1 = nn.Conv1d(self.seq_len, self.pred_len, kernel_size=1, stride=1, bias=False)
self.projection_2 = nn.Conv1d(self.seq_len+self.pred_len, self.seq_len+self.pred_len,
kernel_size = 1, bias = False)
# For positional encoding
self.pe_hidden_size = configs.enc_in
if self.pe_hidden_size % 2 == 1:
self.pe_hidden_size += 1
num_timescales = self.pe_hidden_size // 2
max_timescale = 10000.0
min_timescale = 1.0
log_timescale_increment = (
math.log(float(max_timescale) / float(min_timescale)) /
max(num_timescales - 1, 1))
inv_timescales = min_timescale * torch.exp(
torch.arange(num_timescales, dtype=torch.float32) *
-log_timescale_increment)
self.register_buffer('inv_timescales', inv_timescales)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) # [B,pred_len,C]
dec_out = torch.cat([torch.zeros_like(x_enc), dec_out], dim=1)
return dec_out # [B, T, D]
return None
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# position-encoding
pe = self.get_position_encoding(x_enc)
if pe.shape[2] > x_enc.shape[2]:
x_enc += pe[:, :, :-1]
else:
x_enc += self.get_position_encoding(x_enc)
# SCINet
dec_out = self.sci_net_1(x_enc)
dec_out += x_enc
dec_out = self.projection_1(dec_out)
if self.num_stacks != 1:
dec_out = torch.cat((x_enc, dec_out), dim=1)
temp = dec_out
dec_out = self.sci_net_2(dec_out)
dec_out += temp
dec_out = self.projection_2(dec_out)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1))
return dec_out
def get_position_encoding(self, x):
max_length = x.size()[1]
position = torch.arange(max_length, dtype=torch.float32,
device=x.device) # tensor([0., 1., 2., 3., 4.], device='cuda:0')
scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0) # 5 256
signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1) # [T, C]
signal = F.pad(signal, (0, 0, 0, self.pe_hidden_size % 2))
signal = signal.view(1, max_length, self.pe_hidden_size)
return signal
================================================
FILE: models/SegRNN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Autoformer_EncDec import series_decomp
class Model(nn.Module):
"""
Paper link: https://arxiv.org/abs/2308.11200.pdf
"""
def __init__(self, configs):
super(Model, self).__init__()
# get parameters
self.seq_len = configs.seq_len
self.enc_in = configs.enc_in
self.d_model = configs.d_model
self.dropout = configs.dropout
self.task_name = configs.task_name
if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
self.pred_len = configs.seq_len
else:
self.pred_len = configs.pred_len
self.seg_len = configs.seg_len
self.seg_num_x = self.seq_len // self.seg_len
self.seg_num_y = self.pred_len // self.seg_len
# building model
self.valueEmbedding = nn.Sequential(
nn.Linear(self.seg_len, self.d_model),
nn.ReLU()
)
self.rnn = nn.GRU(input_size=self.d_model, hidden_size=self.d_model, num_layers=1, bias=True,
batch_first=True, bidirectional=False)
self.pos_emb = nn.Parameter(torch.randn(self.seg_num_y, self.d_model // 2))
self.channel_emb = nn.Parameter(torch.randn(self.enc_in, self.d_model // 2))
self.predict = nn.Sequential(
nn.Dropout(self.dropout),
nn.Linear(self.d_model, self.seg_len)
)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.enc_in * configs.seq_len, configs.num_class)
def encoder(self, x):
# b:batch_size c:channel_size s:seq_len s:seq_len
# d:d_model w:seg_len n:seg_num_x m:seg_num_y
batch_size = x.size(0)
# normalization and permute b,s,c -> b,c,s
seq_last = x[:, -1:, :].detach()
x = (x - seq_last).permute(0, 2, 1) # b,c,s
# segment and embedding b,c,s -> bc,n,w -> bc,n,d
x = self.valueEmbedding(x.reshape(-1, self.seg_num_x, self.seg_len))
# encoding
_, hn = self.rnn(x) # bc,n,d 1,bc,d
# m,d//2 -> 1,m,d//2 -> c,m,d//2
# c,d//2 -> c,1,d//2 -> c,m,d//2
# c,m,d -> cm,1,d -> bcm, 1, d
pos_emb = torch.cat([
self.pos_emb.unsqueeze(0).repeat(self.enc_in, 1, 1),
self.channel_emb.unsqueeze(1).repeat(1, self.seg_num_y, 1)
], dim=-1).view(-1, 1, self.d_model).repeat(batch_size,1,1)
_, hy = self.rnn(pos_emb, hn.repeat(1, 1, self.seg_num_y).view(1, -1, self.d_model)) # bcm,1,d 1,bcm,d
# 1,bcm,d -> 1,bcm,w -> b,c,s
y = self.predict(hy).view(-1, self.enc_in, self.pred_len)
# permute and denorm
y = y.permute(0, 2, 1) + seq_last
return y
def forecast(self, x_enc):
# Encoder
return self.encoder(x_enc)
def imputation(self, x_enc):
# Encoder
return self.encoder(x_enc)
def anomaly_detection(self, x_enc):
# Encoder
return self.encoder(x_enc)
def classification(self, x_enc):
# Encoder
enc_out = self.encoder(x_enc)
# Output
# (batch_size, seq_length * d_model)
output = enc_out.reshape(enc_out.shape[0], -1)
# (batch_size, num_classes)
output = self.projection(output)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/Sundial.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
from transformers import AutoModelForCausalLM
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = AutoModelForCausalLM.from_pretrained('thuml/sundial-base-128m', trust_remote_code=True)
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
outputs = []
for i in range(x_enc.shape[-1]):
output = self.model.generate(x_enc[...,i], max_new_tokens=self.pred_len, num_samples=20)
output = output.mean(dim=1)
outputs.append(output)
dec_out = torch.stack(outputs, dim=-1)
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/TSMixer.py
================================================
import torch.nn as nn
class ResBlock(nn.Module):
def __init__(self, configs):
super(ResBlock, self).__init__()
self.temporal = nn.Sequential(
nn.Linear(configs.seq_len, configs.d_model),
nn.ReLU(),
nn.Linear(configs.d_model, configs.seq_len),
nn.Dropout(configs.dropout)
)
self.channel = nn.Sequential(
nn.Linear(configs.enc_in, configs.d_model),
nn.ReLU(),
nn.Linear(configs.d_model, configs.enc_in),
nn.Dropout(configs.dropout)
)
def forward(self, x):
# x: [B, L, D]
x = x + self.temporal(x.transpose(1, 2)).transpose(1, 2)
x = x + self.channel(x)
return x
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.layer = configs.e_layers
self.model = nn.ModuleList([ResBlock(configs)
for _ in range(configs.e_layers)])
self.pred_len = configs.pred_len
self.projection = nn.Linear(configs.seq_len, configs.pred_len)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
# x: [B, L, D]
for i in range(self.layer):
x_enc = self.model[i](x_enc)
enc_out = self.projection(x_enc.transpose(1, 2)).transpose(1, 2)
return enc_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
else:
raise ValueError('Only forecast tasks implemented yet')
================================================
FILE: models/TemporalFusionTransformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Embed import DataEmbedding, TemporalEmbedding
from torch import Tensor
from typing import Optional
from collections import namedtuple
# static: time-independent features
# observed: time features of the past(e.g. predicted targets)
# known: known information about the past and future(i.e. time stamp)
TypePos = namedtuple('TypePos', ['static', 'observed'])
# When you want to use new dataset, please add the index of 'static, observed' columns here.
# 'known' columns needn't be added, because 'known' inputs are automatically judged and provided by the program.
datatype_dict = {'ETTh1': TypePos([], [x for x in range(7)]),
'ETTm1': TypePos([], [x for x in range(7)])}
def get_known_len(embed_type, freq):
if embed_type != 'timeF':
if freq == 't':
return 5
else:
return 4
else:
freq_map = {'h': 4, 't': 5, 's': 6,
'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
return freq_map[freq]
class TFTTemporalEmbedding(TemporalEmbedding):
def __init__(self, d_model, embed_type='fixed', freq='h'):
super(TFTTemporalEmbedding, self).__init__(d_model, embed_type, freq)
def forward(self, x):
x = x.long()
minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
self, 'minute_embed') else 0.
hour_x = self.hour_embed(x[:, :, 3])
weekday_x = self.weekday_embed(x[:, :, 2])
day_x = self.day_embed(x[:, :, 1])
month_x = self.month_embed(x[:, :, 0])
embedding_x = torch.stack([month_x, day_x, weekday_x, hour_x, minute_x], dim=-2) if hasattr(
self, 'minute_embed') else torch.stack([month_x, day_x, weekday_x, hour_x], dim=-2)
return embedding_x
class TFTTimeFeatureEmbedding(nn.Module):
def __init__(self, d_model, embed_type='timeF', freq='h'):
super(TFTTimeFeatureEmbedding, self).__init__()
d_inp = get_known_len(embed_type, freq)
self.embed = nn.ModuleList([nn.Linear(1, d_model, bias=False) for _ in range(d_inp)])
def forward(self, x):
return torch.stack([embed(x[:,:,i].unsqueeze(-1)) for i, embed in enumerate(self.embed)], dim=-2)
class TFTEmbedding(nn.Module):
def __init__(self, configs):
super(TFTEmbedding, self).__init__()
self.pred_len = configs.pred_len
self.static_pos = datatype_dict[configs.data].static
self.observed_pos = datatype_dict[configs.data].observed
self.static_len = len(self.static_pos)
self.observed_len = len(self.observed_pos)
self.static_embedding = nn.ModuleList([DataEmbedding(1,configs.d_model,dropout=configs.dropout) for _ in range(self.static_len)]) \
if self.static_len else None
self.observed_embedding = nn.ModuleList([DataEmbedding(1,configs.d_model,dropout=configs.dropout) for _ in range(self.observed_len)])
self.known_embedding = TFTTemporalEmbedding(configs.d_model, configs.embed, configs.freq) \
if configs.embed != 'timeF' else TFTTimeFeatureEmbedding(configs.d_model, configs.embed, configs.freq)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
if self.static_len:
# static_input: [B,C,d_model]
static_input = torch.stack([embed(x_enc[:,:1,self.static_pos[i]].unsqueeze(-1), None).squeeze(1) for i, embed in enumerate(self.static_embedding)], dim=-2)
else:
static_input = None
# observed_input: [B,T,C,d_model]
observed_input = torch.stack([embed(x_enc[:,:,self.observed_pos[i]].unsqueeze(-1), None) for i, embed in enumerate(self.observed_embedding)], dim=-2)
x_mark = torch.cat([x_mark_enc, x_mark_dec[:,-self.pred_len:,:]], dim=-2)
# known_input: [B,T,C,d_model]
known_input = self.known_embedding(x_mark)
return static_input, observed_input, known_input
class GLU(nn.Module):
def __init__(self, input_size, output_size):
super().__init__()
self.fc1 = nn.Linear(input_size, output_size)
self.fc2 = nn.Linear(input_size, output_size)
self.glu = nn.GLU()
def forward(self, x):
a = self.fc1(x)
b = self.fc2(x)
return self.glu(torch.cat([a, b], dim=-1))
class GateAddNorm(nn.Module):
def __init__(self, input_size, output_size):
super(GateAddNorm, self).__init__()
self.glu = GLU(input_size, input_size)
self.projection = nn.Linear(input_size, output_size) if input_size != output_size else nn.Identity()
self.layer_norm = nn.LayerNorm(output_size)
def forward(self, x, skip_a):
x = self.glu(x)
x = x + skip_a
return self.layer_norm(self.projection(x))
class GRN(nn.Module):
def __init__(self, input_size, output_size, hidden_size=None, context_size=None, dropout=0.0):
super(GRN, self).__init__()
hidden_size = input_size if hidden_size is None else hidden_size
self.lin_a = nn.Linear(input_size, hidden_size)
self.lin_c = nn.Linear(context_size, hidden_size) if context_size is not None else None
self.lin_i = nn.Linear(hidden_size, hidden_size)
self.dropout = nn.Dropout(dropout)
self.project_a = nn.Linear(input_size, hidden_size) if hidden_size != input_size else nn.Identity()
self.gate = GateAddNorm(hidden_size, output_size)
def forward(self, a: Tensor, c: Optional[Tensor] = None):
# a: [B,T,d], c: [B,d]
x = self.lin_a(a)
if c is not None:
x = x + self.lin_c(c).unsqueeze(1)
x = F.elu(x)
x = self.lin_i(x)
x = self.dropout(x)
return self.gate(x, self.project_a(a))
class VariableSelectionNetwork(nn.Module):
def __init__(self, d_model, variable_num, dropout=0.0):
super(VariableSelectionNetwork, self).__init__()
self.joint_grn = GRN(d_model * variable_num, variable_num, hidden_size=d_model, context_size=d_model, dropout=dropout)
self.variable_grns = nn.ModuleList([GRN(d_model, d_model, dropout=dropout) for _ in range(variable_num)])
def forward(self, x: Tensor, context: Optional[Tensor] = None):
# x: [B,T,C,d] or [B,C,d]
# selection_weights: [B,T,C] or [B,C]
# x_processed: [B,T,d,C] or [B,d,C]
# selection_result: [B,T,d] or [B,d]
x_flattened = torch.flatten(x, start_dim=-2)
selection_weights = self.joint_grn(x_flattened, context)
selection_weights = F.softmax(selection_weights, dim=-1)
x_processed = torch.stack([grn(x[...,i,:]) for i, grn in enumerate(self.variable_grns)], dim=-1)
selection_result = torch.matmul(x_processed, selection_weights.unsqueeze(-1)).squeeze(-1)
return selection_result
class StaticCovariateEncoder(nn.Module):
def __init__(self, d_model, static_len, dropout=0.0):
super(StaticCovariateEncoder, self).__init__()
self.static_vsn = VariableSelectionNetwork(d_model, static_len) if static_len else None
self.grns = nn.ModuleList([GRN(d_model, d_model, dropout=dropout) for _ in range(4)])
def forward(self, static_input):
# static_input: [B,C,d]
if static_input is not None:
static_features = self.static_vsn(static_input)
return [grn(static_features) for grn in self.grns]
else:
return [None] * 4
class InterpretableMultiHeadAttention(nn.Module):
def __init__(self, configs):
super(InterpretableMultiHeadAttention, self).__init__()
self.n_heads = configs.n_heads
assert configs.d_model % configs.n_heads == 0
self.d_head = configs.d_model // configs.n_heads
self.qkv_linears = nn.Linear(configs.d_model, (2 * self.n_heads + 1) * self.d_head, bias=False)
self.out_projection = nn.Linear(self.d_head, configs.d_model, bias=False)
self.out_dropout = nn.Dropout(configs.dropout)
self.scale = self.d_head ** -0.5
example_len = configs.seq_len + configs.pred_len
self.register_buffer("mask", torch.triu(torch.full((example_len, example_len), float('-inf')), 1))
def forward(self, x):
# Q,K,V are all from x
B, T, d_model = x.shape
qkv = self.qkv_linears(x)
q, k, v = qkv.split((self.n_heads * self.d_head, self.n_heads * self.d_head, self.d_head), dim=-1)
q = q.view(B, T, self.n_heads, self.d_head)
k = k.view(B, T, self.n_heads, self.d_head)
v = v.view(B, T, self.d_head)
attention_score = torch.matmul(q.permute((0, 2, 1, 3)), k.permute((0, 2, 3, 1))) # [B,n,T,T]
attention_score.mul_(self.scale)
attention_score = attention_score + self.mask
attention_prob = F.softmax(attention_score, dim=3) # [B,n,T,T]
attention_out = torch.matmul(attention_prob, v.unsqueeze(1)) # [B,n,T,d]
attention_out = torch.mean(attention_out, dim=1) # [B,T,d]
out = self.out_projection(attention_out)
out = self.out_dropout(out) # [B,T,d]
return out
class TemporalFusionDecoder(nn.Module):
def __init__(self, configs):
super(TemporalFusionDecoder, self).__init__()
self.pred_len = configs.pred_len
self.history_encoder = nn.LSTM(configs.d_model, configs.d_model, batch_first=True)
self.future_encoder = nn.LSTM(configs.d_model, configs.d_model, batch_first=True)
self.gate_after_lstm = GateAddNorm(configs.d_model, configs.d_model)
self.enrichment_grn = GRN(configs.d_model, configs.d_model, context_size=configs.d_model, dropout=configs.dropout)
self.attention = InterpretableMultiHeadAttention(configs)
self.gate_after_attention = GateAddNorm(configs.d_model, configs.d_model)
self.position_wise_grn = GRN(configs.d_model, configs.d_model, dropout=configs.dropout)
self.gate_final = GateAddNorm(configs.d_model, configs.d_model)
self.out_projection = nn.Linear(configs.d_model, configs.c_out)
def forward(self, history_input, future_input, c_c, c_h, c_e):
# history_input, future_input: [B,T,d]
# c_c, c_h, c_e: [B,d]
# LSTM
c = (c_c.unsqueeze(0), c_h.unsqueeze(0)) if c_c is not None and c_h is not None else None
historical_features, state = self.history_encoder(history_input, c)
future_features, _ = self.future_encoder(future_input, state)
# Skip connection
temporal_input = torch.cat([history_input, future_input], dim=1)
temporal_features = torch.cat([historical_features, future_features], dim=1)
temporal_features = self.gate_after_lstm(temporal_features, temporal_input) # [B,T,d]
# Static enrichment
enriched_features = self.enrichment_grn(temporal_features, c_e) # [B,T,d]
# Temporal self-attention
attention_out = self.attention(enriched_features) # [B,T,d]
# Don't compute historical loss
attention_out = self.gate_after_attention(attention_out[:,-self.pred_len:], enriched_features[:,-self.pred_len:])
# Position-wise feed-forward
out = self.position_wise_grn(attention_out) # [B,T,d]
# Final skip connection
out = self.gate_final(out, temporal_features[:,-self.pred_len:])
return self.out_projection(out)
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.configs = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
# Number of variables
self.static_len = len(datatype_dict[configs.data].static)
self.observed_len = len(datatype_dict[configs.data].observed)
self.known_len = get_known_len(configs.embed, configs.freq)
self.embedding = TFTEmbedding(configs)
self.static_encoder = StaticCovariateEncoder(configs.d_model, self.static_len)
self.history_vsn = VariableSelectionNetwork(configs.d_model, self.observed_len + self.known_len)
self.future_vsn = VariableSelectionNetwork(configs.d_model, self.known_len)
self.temporal_fusion_decoder = TemporalFusionDecoder(configs)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# Data embedding
# static_input: [B,C,d], observed_input:[B,T,C,d], known_input: [B,T,C,d]
static_input, observed_input, known_input = self.embedding(x_enc, x_mark_enc, x_dec, x_mark_dec)
# Static context
# c_s,...,c_e: [B,d]
c_s, c_c, c_h, c_e = self.static_encoder(static_input)
# Temporal input Selection
history_input = torch.cat([observed_input, known_input[:,:self.seq_len]], dim=-2)
future_input = known_input[:,self.seq_len:]
history_input = self.history_vsn(history_input, c_s)
future_input = self.future_vsn(future_input, c_s)
# TFT main procedure after variable selection
# history_input: [B,T,d], future_input: [B,T,d]
dec_out = self.temporal_fusion_decoder(history_input, future_input, c_c, c_h, c_e)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) # [B,pred_len,C]
dec_out = torch.cat([torch.zeros_like(x_enc), dec_out], dim=1)
return dec_out # [B, T, D]
return None
================================================
FILE: models/TiDE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class LayerNorm(nn.Module):
""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
def __init__(self, ndim, bias):
super().__init__()
self.weight = nn.Parameter(torch.ones(ndim))
self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
def forward(self, input):
return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
class ResBlock(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.1, bias=True):
super().__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim, bias=bias)
self.fc2 = nn.Linear(hidden_dim, output_dim, bias=bias)
self.fc3 = nn.Linear(input_dim, output_dim, bias=bias)
self.dropout = nn.Dropout(dropout)
self.relu = nn.ReLU()
self.ln = LayerNorm(output_dim, bias=bias)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
out = self.dropout(out)
out = out + self.fc3(x)
out = self.ln(out)
return out
#TiDE
class Model(nn.Module):
"""
paper: https://arxiv.org/pdf/2304.08424.pdf
"""
def __init__(self, configs, bias=True, feature_encode_dim=2):
super(Model, self).__init__()
self.configs = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len #L
self.label_len = configs.label_len
self.pred_len = configs.pred_len #H
self.hidden_dim=configs.d_model
self.res_hidden=configs.d_model
self.encoder_num=configs.e_layers
self.decoder_num=configs.d_layers
self.freq=configs.freq
self.feature_encode_dim=feature_encode_dim
self.decode_dim = configs.c_out
self.temporalDecoderHidden=configs.d_ff
dropout=configs.dropout
freq_map = {'h': 4, 't': 5, 's': 6,
'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
self.feature_dim=freq_map[self.freq]
flatten_dim = self.seq_len + (self.seq_len + self.pred_len) * self.feature_encode_dim
self.feature_encoder = ResBlock(self.feature_dim, self.res_hidden, self.feature_encode_dim, dropout, bias)
self.encoders = nn.Sequential(ResBlock(flatten_dim, self.res_hidden, self.hidden_dim, dropout, bias),*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.encoder_num-1)))
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.pred_len, dropout, bias))
self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
self.residual_proj = nn.Linear(self.seq_len, self.pred_len, bias=bias)
if self.task_name == 'imputation':
self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias))
self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias)
if self.task_name == 'anomaly_detection':
self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias))
self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias)
def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark):
# Normalization
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
feature = self.feature_encoder(batch_y_mark)
hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1))
decoded = self.decoders(hidden).reshape(hidden.shape[0], self.pred_len, self.decode_dim)
dec_out = self.temporalDecoder(torch.cat([feature[:,self.seq_len:], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc)
# De-Normalization
dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len))
dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len))
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask):
# Normalization
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
feature = self.feature_encoder(x_mark_enc)
hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1))
decoded = self.decoders(hidden).reshape(hidden.shape[0], self.seq_len, self.decode_dim)
dec_out = self.temporalDecoder(torch.cat([feature[:,:self.seq_len], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc)
# De-Normalization
dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.seq_len))
dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.seq_len))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask=None):
'''x_mark_enc is the exogenous dynamic feature described in the original paper'''
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
if batch_y_mark is None:
batch_y_mark = torch.zeros((x_enc.shape[0], self.seq_len+self.pred_len, self.feature_dim)).to(x_enc.device).detach()
else:
batch_y_mark = torch.concat([x_mark_enc, batch_y_mark[:, -self.pred_len:, :]],dim=1)
dec_out = torch.stack([self.forecast(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark) for feature in range(x_enc.shape[-1])],dim=-1)
return dec_out # [B, L, D]
if self.task_name == 'imputation':
dec_out = torch.stack([self.imputation(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark, mask) for feature in range(x_enc.shape[-1])],dim=-1)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
raise NotImplementedError("Task anomaly_detection for Tide is temporarily not supported")
if self.task_name == 'classification':
raise NotImplementedError("Task classification for Tide is temporarily not supported")
return None
================================================
FILE: models/TiRex.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
from tirex import load_model, ForecastModel
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = load_model("NX-AI/TiRex")
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc.sub(means)
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc = x_enc.div(stdev)
B, L, C = x_enc.shape
x_enc = torch.reshape(x_enc, (B*C, L))
quantiles, output = self.model.forecast(x_enc, prediction_length=self.pred_len)
dec_out = torch.reshape(output, (B, output.shape[-1], C)).to(x_enc.device)
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/TimeFilter.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from layers.Embed import PositionalEmbedding
from layers.StandardNorm import Normalize
from layers.TimeFilter_layers import TimeFilter_Backbone
class PatchEmbed(nn.Module):
def __init__(self, dim, patch_len, stride=None, pos=True):
super().__init__()
self.patch_len = patch_len
self.stride = patch_len if stride is None else stride
self.patch_proj = nn.Linear(self.patch_len, dim)
self.pos = pos
if self.pos:
pos_emb_theta = 10000
self.pe = PositionalEmbedding(dim, pos_emb_theta)
def forward(self, x):
# x: [B, N, T]
x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
# x: [B, N*L, P]
x = self.patch_proj(x) # [B, N*L, D]
if self.pos:
x += self.pe(x)
return x
class Model(nn.Module):
def __init__(self, configs):
super().__init__()
self.args = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.n_vars = configs.c_out
self.dim = configs.d_model
self.d_ff = configs.d_ff
self.patch_len = configs.patch_len
self.stride = self.patch_len
self.num_patches = int((self.seq_len - self.patch_len) / self.stride + 1) # L
# Filter
self.alpha = 0.1 if configs.alpha is None else configs.alpha
self.top_p = 0.5 if configs.top_p is None else configs.top_p
# embed
self.patch_embed = PatchEmbed(self.dim, self.patch_len, self.stride, configs.pos)
# TimeFilter.sh Backbone
self.backbone = TimeFilter_Backbone(self.dim, self.n_vars, self.d_ff,
configs.n_heads, configs.e_layers, self.top_p, configs.dropout,
self.seq_len * self.n_vars // self.patch_len)
# head
# self.head = nn.Linear(self.dim * self.num_patches, self.pred_len)
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.head = nn.Linear(self.dim * self.num_patches, self.pred_len)
elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.head = nn.Linear(self.dim * self.num_patches, self.seq_len)
elif self.task_name == 'classification':
self.num_patches = int((self.seq_len * configs.enc_in - self.patch_len) / self.stride + 1) # L
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
self.dim * self.num_patches, configs.num_class)
# Without RevIN
self.use_RevIN = False
self.norm = Normalize(configs.enc_in, affine=self.use_RevIN)
def _get_mask(self, device):
dtype = torch.float32
L = self.args.seq_len * self.args.c_out // self.args.patch_len
N = self.args.seq_len // self.args.patch_len
masks = []
for k in range(L):
S = ((torch.arange(L) % N == k % N) & (torch.arange(L) != k)).to(dtype).to(device)
T = ((torch.arange(L) >= k // N * N) & (torch.arange(L) < k // N * N + N) & (torch.arange(L) != k)).to(
dtype).to(device)
ST = torch.ones(L).to(dtype).to(device) - S - T
ST[k] = 0.0
masks.append(torch.stack([S, T, ST], dim=0))
masks = torch.stack(masks, dim=0)
return masks
def forecast(self, x, masks, x_dec, x_mark_dec):
# x: [B, T, C]
B, T, C = x.shape
# Normalization
x = self.norm(x, 'norm')
# x: [B, C, T]
x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T]
x = self.patch_embed(x) # [B, N, D] N = [C*T / P]
x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha)
# [B, C, T/P, D]
x = self.head(x.reshape(-1, self.n_vars, self.num_patches, self.dim).flatten(start_dim=-2)) # [B, C, T]
x = x.permute(0, 2, 1)
# De-Normalization
x = self.norm(x, 'denorm')
return x
def imputation(self, x, x_mark_enc, x_dec, x_mark_dec, mask):
# x: [B, T, C]
B, T, C = x.shape
# Normalization
x = self.norm(x, 'norm')
# x: [B, C, T]
x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T]
x = self.patch_embed(x) # [B, N, D] N = [C*T / P]
x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha)
# [B, C, T/P, D]
x = self.head(x.reshape(-1, self.n_vars, self.num_patches, self.dim).flatten(start_dim=-2)) # [B, C, T]
x = x.permute(0, 2, 1)
# De-Normalization
x = self.norm(x, 'denorm')
return x
def classification(self, x, x_mark_enc):
# x: [B, T, C]
B, T, C = x.shape
# Normalization
x = self.norm(x, 'norm')
# x: [B, C, T]
x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T]
x = self.patch_embed(x) # [B, N, D] N = [C*T / P]
x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha)
# [B, C, T/P, D]
output = self.dropout(x.flatten(start_dim=1))
output = self.projection(output) # (batch_size, num_classes)
return output
def anomaly_detection(self, x):
# x: [B, T, C]
B, T, C = x.shape
# Normalization
x = self.norm(x, 'norm')
# x: [B, C, T]
x = x.permute(0, 2, 1).reshape(-1, C * T) # [B, C*T]
x = self.patch_embed(x) # [B, N, D] N = [C*T / P]
x, moe_loss = self.backbone(x, self._get_mask(x.device), self.alpha)
# [B, C, T/P, D]
x = self.head(x.reshape(-1, self.n_vars, self.num_patches, self.dim).flatten(start_dim=-2)) # [B, C, T]
x = x.permute(0, 2, 1)
# De-Normalization
x = self.norm(x, 'denorm')
return x
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/TimeMixer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Autoformer_EncDec import series_decomp
from layers.Embed import DataEmbedding_wo_pos
from layers.StandardNorm import Normalize
class DFT_series_decomp(nn.Module):
"""
Series decomposition block
"""
def __init__(self, top_k: int = 5):
super(DFT_series_decomp, self).__init__()
self.top_k = top_k
def forward(self, x):
xf = torch.fft.rfft(x, dim=1)
freq = abs(xf)
freq[0] = 0
top_k_freq, top_list = torch.topk(freq, k=self.top_k)
xf[freq <= top_k_freq.min()] = 0
x_season = torch.fft.irfft(xf, dim=1)
x_trend = x - x_season
return x_season, x_trend
class MultiScaleSeasonMixing(nn.Module):
"""
Bottom-up mixing season pattern
"""
def __init__(self, configs):
super(MultiScaleSeasonMixing, self).__init__()
self.down_sampling_layers = torch.nn.ModuleList(
[
nn.Sequential(
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** i),
configs.seq_len // (configs.down_sampling_window ** (i + 1)),
),
nn.GELU(),
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** (i + 1)),
configs.seq_len // (configs.down_sampling_window ** (i + 1)),
),
)
for i in range(configs.down_sampling_layers)
]
)
def forward(self, season_list):
# mixing high->low
out_high = season_list[0]
out_low = season_list[1]
out_season_list = [out_high.permute(0, 2, 1)]
for i in range(len(season_list) - 1):
out_low_res = self.down_sampling_layers[i](out_high)
out_low = out_low + out_low_res
out_high = out_low
if i + 2 <= len(season_list) - 1:
out_low = season_list[i + 2]
out_season_list.append(out_high.permute(0, 2, 1))
return out_season_list
class MultiScaleTrendMixing(nn.Module):
"""
Top-down mixing trend pattern
"""
def __init__(self, configs):
super(MultiScaleTrendMixing, self).__init__()
self.up_sampling_layers = torch.nn.ModuleList(
[
nn.Sequential(
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** (i + 1)),
configs.seq_len // (configs.down_sampling_window ** i),
),
nn.GELU(),
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** i),
configs.seq_len // (configs.down_sampling_window ** i),
),
)
for i in reversed(range(configs.down_sampling_layers))
])
def forward(self, trend_list):
# mixing low->high
trend_list_reverse = trend_list.copy()
trend_list_reverse.reverse()
out_low = trend_list_reverse[0]
out_high = trend_list_reverse[1]
out_trend_list = [out_low.permute(0, 2, 1)]
for i in range(len(trend_list_reverse) - 1):
out_high_res = self.up_sampling_layers[i](out_low)
out_high = out_high + out_high_res
out_low = out_high
if i + 2 <= len(trend_list_reverse) - 1:
out_high = trend_list_reverse[i + 2]
out_trend_list.append(out_low.permute(0, 2, 1))
out_trend_list.reverse()
return out_trend_list
class PastDecomposableMixing(nn.Module):
def __init__(self, configs):
super(PastDecomposableMixing, self).__init__()
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.down_sampling_window = configs.down_sampling_window
self.layer_norm = nn.LayerNorm(configs.d_model)
self.dropout = nn.Dropout(configs.dropout)
self.channel_independence = configs.channel_independence
if configs.decomp_method == 'moving_avg':
self.decompsition = series_decomp(configs.moving_avg)
elif configs.decomp_method == "dft_decomp":
self.decompsition = DFT_series_decomp(configs.top_k)
else:
raise ValueError('decompsition is error')
if not configs.channel_independence:
self.cross_layer = nn.Sequential(
nn.Linear(in_features=configs.d_model, out_features=configs.d_ff),
nn.GELU(),
nn.Linear(in_features=configs.d_ff, out_features=configs.d_model),
)
# Mixing season
self.mixing_multi_scale_season = MultiScaleSeasonMixing(configs)
# Mxing trend
self.mixing_multi_scale_trend = MultiScaleTrendMixing(configs)
self.out_cross_layer = nn.Sequential(
nn.Linear(in_features=configs.d_model, out_features=configs.d_ff),
nn.GELU(),
nn.Linear(in_features=configs.d_ff, out_features=configs.d_model),
)
def forward(self, x_list):
length_list = []
for x in x_list:
_, T, _ = x.size()
length_list.append(T)
# Decompose to obtain the season and trend
season_list = []
trend_list = []
for x in x_list:
season, trend = self.decompsition(x)
if not self.channel_independence:
season = self.cross_layer(season)
trend = self.cross_layer(trend)
season_list.append(season.permute(0, 2, 1))
trend_list.append(trend.permute(0, 2, 1))
# bottom-up season mixing
out_season_list = self.mixing_multi_scale_season(season_list)
# top-down trend mixing
out_trend_list = self.mixing_multi_scale_trend(trend_list)
out_list = []
for ori, out_season, out_trend, length in zip(x_list, out_season_list, out_trend_list,
length_list):
out = out_season + out_trend
if self.channel_independence:
out = ori + self.out_cross_layer(out)
out_list.append(out[:, :length, :])
return out_list
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.configs = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.down_sampling_window = configs.down_sampling_window
self.channel_independence = configs.channel_independence
self.pdm_blocks = nn.ModuleList([PastDecomposableMixing(configs)
for _ in range(configs.e_layers)])
self.preprocess = series_decomp(configs.moving_avg)
self.enc_in = configs.enc_in
if self.channel_independence:
self.enc_embedding = DataEmbedding_wo_pos(1, configs.d_model, configs.embed, configs.freq,
configs.dropout)
else:
self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.layer = configs.e_layers
self.normalize_layers = torch.nn.ModuleList(
[
Normalize(self.configs.enc_in, affine=True, non_norm=True if configs.use_norm == 0 else False)
for i in range(configs.down_sampling_layers + 1)
]
)
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.predict_layers = torch.nn.ModuleList(
[
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** i),
configs.pred_len,
)
for i in range(configs.down_sampling_layers + 1)
]
)
if self.channel_independence:
self.projection_layer = nn.Linear(
configs.d_model, 1, bias=True)
else:
self.projection_layer = nn.Linear(
configs.d_model, configs.c_out, bias=True)
self.out_res_layers = torch.nn.ModuleList([
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** i),
configs.seq_len // (configs.down_sampling_window ** i),
)
for i in range(configs.down_sampling_layers + 1)
])
self.regression_layers = torch.nn.ModuleList(
[
torch.nn.Linear(
configs.seq_len // (configs.down_sampling_window ** i),
configs.pred_len,
)
for i in range(configs.down_sampling_layers + 1)
]
)
if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
if self.channel_independence:
self.projection_layer = nn.Linear(
configs.d_model, 1, bias=True)
else:
self.projection_layer = nn.Linear(
configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.d_model * configs.seq_len, configs.num_class)
def out_projection(self, dec_out, i, out_res):
dec_out = self.projection_layer(dec_out)
out_res = out_res.permute(0, 2, 1)
out_res = self.out_res_layers[i](out_res)
out_res = self.regression_layers[i](out_res).permute(0, 2, 1)
dec_out = dec_out + out_res
return dec_out
def pre_enc(self, x_list):
if self.channel_independence:
return (x_list, None)
else:
out1_list = []
out2_list = []
for x in x_list:
x_1, x_2 = self.preprocess(x)
out1_list.append(x_1)
out2_list.append(x_2)
return (out1_list, out2_list)
def __multi_scale_process_inputs(self, x_enc, x_mark_enc):
if self.configs.down_sampling_method == 'max':
down_pool = torch.nn.MaxPool1d(self.configs.down_sampling_window, return_indices=False)
elif self.configs.down_sampling_method == 'avg':
down_pool = torch.nn.AvgPool1d(self.configs.down_sampling_window)
elif self.configs.down_sampling_method == 'conv':
padding = 1 if torch.__version__ >= '1.5.0' else 2
down_pool = nn.Conv1d(in_channels=self.configs.enc_in, out_channels=self.configs.enc_in,
kernel_size=3, padding=padding,
stride=self.configs.down_sampling_window,
padding_mode='circular',
bias=False)
else:
return x_enc, x_mark_enc
# B,T,C -> B,C,T
x_enc = x_enc.permute(0, 2, 1)
x_enc_ori = x_enc
x_mark_enc_mark_ori = x_mark_enc
x_enc_sampling_list = []
x_mark_sampling_list = []
x_enc_sampling_list.append(x_enc.permute(0, 2, 1))
x_mark_sampling_list.append(x_mark_enc)
for i in range(self.configs.down_sampling_layers):
x_enc_sampling = down_pool(x_enc_ori)
x_enc_sampling_list.append(x_enc_sampling.permute(0, 2, 1))
x_enc_ori = x_enc_sampling
if x_mark_enc is not None:
x_mark_sampling_list.append(x_mark_enc_mark_ori[:, ::self.configs.down_sampling_window, :])
x_mark_enc_mark_ori = x_mark_enc_mark_ori[:, ::self.configs.down_sampling_window, :]
x_enc = x_enc_sampling_list
x_mark_enc = x_mark_sampling_list if x_mark_enc is not None else None
return x_enc, x_mark_enc
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc)
x_list = []
x_mark_list = []
if x_mark_enc is not None:
for i, x, x_mark in zip(range(len(x_enc)), x_enc, x_mark_enc):
B, T, N = x.size()
x = self.normalize_layers[i](x, 'norm')
if self.channel_independence:
x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
x_list.append(x)
x_mark = x_mark.repeat(N, 1, 1)
x_mark_list.append(x_mark)
else:
x_list.append(x)
x_mark_list.append(x_mark)
else:
for i, x in zip(range(len(x_enc)), x_enc, ):
B, T, N = x.size()
x = self.normalize_layers[i](x, 'norm')
if self.channel_independence:
x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
x_list.append(x)
# embedding
enc_out_list = []
x_list = self.pre_enc(x_list)
if x_mark_enc is not None:
for i, x, x_mark in zip(range(len(x_list[0])), x_list[0], x_mark_list):
enc_out = self.enc_embedding(x, x_mark) # [B,T,C]
enc_out_list.append(enc_out)
else:
for i, x in zip(range(len(x_list[0])), x_list[0]):
enc_out = self.enc_embedding(x, None) # [B,T,C]
enc_out_list.append(enc_out)
# Past Decomposable Mixing as encoder for past
for i in range(self.layer):
enc_out_list = self.pdm_blocks[i](enc_out_list)
# Future Multipredictor Mixing as decoder for future
dec_out_list = self.future_multi_mixing(B, enc_out_list, x_list)
dec_out = torch.stack(dec_out_list, dim=-1).sum(-1)
dec_out = self.normalize_layers[0](dec_out, 'denorm')
return dec_out
def future_multi_mixing(self, B, enc_out_list, x_list):
dec_out_list = []
if self.channel_independence:
x_list = x_list[0]
for i, enc_out in zip(range(len(x_list)), enc_out_list):
dec_out = self.predict_layers[i](enc_out.permute(0, 2, 1)).permute(
0, 2, 1) # align temporal dimension
dec_out = self.projection_layer(dec_out)
dec_out = dec_out.reshape(B, self.configs.c_out, self.pred_len).permute(0, 2, 1).contiguous()
dec_out_list.append(dec_out)
else:
for i, enc_out, out_res in zip(range(len(x_list[0])), enc_out_list, x_list[1]):
dec_out = self.predict_layers[i](enc_out.permute(0, 2, 1)).permute(
0, 2, 1) # align temporal dimension
dec_out = self.out_projection(dec_out, i, out_res)
dec_out_list.append(dec_out)
return dec_out_list
def classification(self, x_enc, x_mark_enc):
x_enc, _ = self.__multi_scale_process_inputs(x_enc, None)
x_list = x_enc
# embedding
enc_out_list = []
for x in x_list:
enc_out = self.enc_embedding(x, None) # [B,T,C]
enc_out_list.append(enc_out)
# MultiScale-CrissCrossAttention as encoder for past
for i in range(self.layer):
enc_out_list = self.pdm_blocks[i](enc_out_list)
enc_out = enc_out_list[0]
# Output
# the output transformer encoder/decoder embeddings don't include non-linearity
output = self.act(enc_out)
output = self.dropout(output)
# zero-out padding embeddings
output = output * x_mark_enc.unsqueeze(-1)
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def anomaly_detection(self, x_enc):
B, T, N = x_enc.size()
x_enc, _ = self.__multi_scale_process_inputs(x_enc, None)
x_list = []
for i, x in zip(range(len(x_enc)), x_enc, ):
B, T, N = x.size()
x = self.normalize_layers[i](x, 'norm')
if self.channel_independence:
x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
x_list.append(x)
# embedding
enc_out_list = []
for x in x_list:
enc_out = self.enc_embedding(x, None) # [B,T,C]
enc_out_list.append(enc_out)
# MultiScale-CrissCrossAttention as encoder for past
for i in range(self.layer):
enc_out_list = self.pdm_blocks[i](enc_out_list)
dec_out = self.projection_layer(enc_out_list[0])
dec_out = dec_out.reshape(B, self.configs.c_out, -1).permute(0, 2, 1).contiguous()
dec_out = self.normalize_layers[0](dec_out, 'denorm')
return dec_out
def imputation(self, x_enc, x_mark_enc, mask):
means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
means = means.unsqueeze(1).detach()
x_enc = x_enc - means
x_enc = x_enc.masked_fill(mask == 0, 0)
stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /
torch.sum(mask == 1, dim=1) + 1e-5)
stdev = stdev.unsqueeze(1).detach()
x_enc /= stdev
B, T, N = x_enc.size()
x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc)
x_list = []
x_mark_list = []
if x_mark_enc is not None:
for i, x, x_mark in zip(range(len(x_enc)), x_enc, x_mark_enc):
B, T, N = x.size()
if self.channel_independence:
x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
x_list.append(x)
x_mark = x_mark.repeat(N, 1, 1)
x_mark_list.append(x_mark)
else:
for i, x in zip(range(len(x_enc)), x_enc, ):
B, T, N = x.size()
if self.channel_independence:
x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
x_list.append(x)
# embedding
enc_out_list = []
for x in x_list:
enc_out = self.enc_embedding(x, None) # [B,T,C]
enc_out_list.append(enc_out)
# MultiScale-CrissCrossAttention as encoder for past
for i in range(self.layer):
enc_out_list = self.pdm_blocks[i](enc_out_list)
dec_out = self.projection_layer(enc_out_list[0])
dec_out = dec_out.reshape(B, self.configs.c_out, -1).permute(0, 2, 1).contiguous()
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
else:
raise ValueError('Other tasks implemented yet')
================================================
FILE: models/TimeMoE.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
from transformers import AutoModelForCausalLM
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = AutoModelForCausalLM.from_pretrained('Maple728/TimeMoE-50M', trust_remote_code=True)
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc.sub(means)
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc = x_enc.div(stdev)
B, L, C = x_enc.shape
x_enc = torch.reshape(x_enc, (B*C, L))
output = self.model.generate(x_enc, max_new_tokens=self.pred_len)
dec_out = torch.reshape(output, (B, output.shape[-1], C))
dec_out = dec_out[:,-self.pred_len:, :]
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/TimeXer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import DataEmbedding_inverted, PositionalEmbedding
import numpy as np
class FlattenHead(nn.Module):
def __init__(self, n_vars, nf, target_window, head_dropout=0):
super().__init__()
self.n_vars = n_vars
self.flatten = nn.Flatten(start_dim=-2)
self.linear = nn.Linear(nf, target_window)
self.dropout = nn.Dropout(head_dropout)
def forward(self, x): # x: [bs x nvars x d_model x patch_num]
x = self.flatten(x)
x = self.linear(x)
x = self.dropout(x)
return x
class EnEmbedding(nn.Module):
def __init__(self, n_vars, d_model, patch_len, dropout):
super(EnEmbedding, self).__init__()
# Patching
self.patch_len = patch_len
self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
self.glb_token = nn.Parameter(torch.randn(1, n_vars, 1, d_model))
self.position_embedding = PositionalEmbedding(d_model)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# do patching
n_vars = x.shape[1]
glb = self.glb_token.repeat((x.shape[0], 1, 1, 1))
x = x.unfold(dimension=-1, size=self.patch_len, step=self.patch_len)
x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
# Input encoding
x = self.value_embedding(x) + self.position_embedding(x)
x = torch.reshape(x, (-1, n_vars, x.shape[-2], x.shape[-1]))
x = torch.cat([x, glb], dim=2)
x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
return self.dropout(x), n_vars
class Encoder(nn.Module):
def __init__(self, layers, norm_layer=None, projection=None):
super(Encoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
self.projection = projection
def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
for layer in self.layers:
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
if self.norm is not None:
x = self.norm(x)
if self.projection is not None:
x = self.projection(x)
return x
class EncoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
B, L, D = cross.shape
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask,
tau=tau, delta=None
)[0])
x = self.norm1(x)
x_glb_ori = x[:, -1, :].unsqueeze(1)
x_glb = torch.reshape(x_glb_ori, (B, -1, D))
x_glb_attn = self.dropout(self.cross_attention(
x_glb, cross, cross,
attn_mask=cross_mask,
tau=tau, delta=delta
)[0])
x_glb_attn = torch.reshape(x_glb_attn,
(x_glb_attn.shape[0] * x_glb_attn.shape[1], x_glb_attn.shape[2])).unsqueeze(1)
x_glb = x_glb_ori + x_glb_attn
x_glb = self.norm2(x_glb)
y = x = torch.cat([x[:, :-1, :], x_glb], dim=1)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm3(x + y)
class Model(nn.Module):
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.features = configs.features
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.use_norm = configs.use_norm
self.patch_len = configs.patch_len
self.patch_num = int(configs.seq_len // configs.patch_len)
self.n_vars = 1 if configs.features == 'MS' else configs.enc_in
# Embedding
self.en_embedding = EnEmbedding(self.n_vars, configs.d_model, self.patch_len, configs.dropout)
self.ex_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder-only architecture
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.e_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
self.head_nf = configs.d_model * (self.patch_num + 1)
self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len,
head_dropout=configs.dropout)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
if self.use_norm:
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, _, N = x_enc.shape
en_embed, n_vars = self.en_embedding(x_enc[:, :, -1].unsqueeze(-1).permute(0, 2, 1))
ex_embed = self.ex_embedding(x_enc[:, :, :-1], x_mark_enc)
enc_out = self.encoder(en_embed, ex_embed)
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
dec_out = self.head(enc_out) # z: [bs x nvars x target_window]
dec_out = dec_out.permute(0, 2, 1)
if self.use_norm:
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * (stdev[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + (means[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forecast_multi(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
if self.use_norm:
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, _, N = x_enc.shape
en_embed, n_vars = self.en_embedding(x_enc.permute(0, 2, 1))
ex_embed = self.ex_embedding(x_enc, x_mark_enc)
enc_out = self.encoder(en_embed, ex_embed)
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
dec_out = self.head(enc_out) # z: [bs x nvars x target_window]
dec_out = dec_out.permute(0, 2, 1)
if self.use_norm:
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
if self.features == 'M':
dec_out = self.forecast_multi(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
else:
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
else:
return None
================================================
FILE: models/TimesFM.py
================================================
import torch
from torch import nn
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import PatchEmbedding
import timesfm
class Model(nn.Module):
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.model = timesfm.TimesFM_2p5_200M_torch.from_pretrained("google/timesfm-2.5-200m-pytorch")
self.model.compile(
timesfm.ForecastConfig(
max_context=configs.seq_len,
max_horizon=configs.pred_len,
normalize_inputs=True,
use_continuous_quantile_head=True,
force_flip_invariance=True,
infer_is_positive=True,
fix_quantile_crossing=True,
)
)
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc.sub(means)
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc = x_enc.div(stdev)
B, L, C = x_enc.shape
device = x_enc.device
x_enc = torch.reshape(x_enc, (B*C, L))
output, _ = self.model.forecast(
horizon=self.pred_len,
inputs=x_enc.cpu().numpy()
)
output = torch.Tensor(output).to(device)
dec_out = torch.reshape(output, (B, output.shape[-1], C)).to(x_enc.device)
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'zero_shot_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out
return None
================================================
FILE: models/TimesNet.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.fft
from layers.Embed import DataEmbedding
from layers.Conv_Blocks import Inception_Block_V1
def FFT_for_Period(x, k=2):
# [B, T, C]
xf = torch.fft.rfft(x, dim=1)
# find period by amplitudes
frequency_list = abs(xf).mean(0).mean(-1)
frequency_list[0] = 0
_, top_list = torch.topk(frequency_list, k)
top_list = top_list.detach().cpu().numpy()
period = x.shape[1] // top_list
return period, abs(xf).mean(-1)[:, top_list]
class TimesBlock(nn.Module):
def __init__(self, configs):
super(TimesBlock, self).__init__()
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
self.k = configs.top_k
# parameter-efficient design
self.conv = nn.Sequential(
Inception_Block_V1(configs.d_model, configs.d_ff,
num_kernels=configs.num_kernels),
nn.GELU(),
Inception_Block_V1(configs.d_ff, configs.d_model,
num_kernels=configs.num_kernels)
)
def forward(self, x):
B, T, N = x.size()
period_list, period_weight = FFT_for_Period(x, self.k)
res = []
for i in range(self.k):
period = period_list[i]
# padding
if (self.seq_len + self.pred_len) % period != 0:
length = (
((self.seq_len + self.pred_len) // period) + 1) * period
padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)
out = torch.cat([x, padding], dim=1)
else:
length = (self.seq_len + self.pred_len)
out = x
# reshape
out = out.reshape(B, length // period, period,
N).permute(0, 3, 1, 2).contiguous()
# 2D conv: from 1d Variation to 2d Variation
out = self.conv(out)
# reshape back
out = out.permute(0, 2, 3, 1).reshape(B, -1, N)
res.append(out[:, :(self.seq_len + self.pred_len), :])
res = torch.stack(res, dim=-1)
# adaptive aggregation
period_weight = F.softmax(period_weight, dim=1)
period_weight = period_weight.unsqueeze(
1).unsqueeze(1).repeat(1, T, N, 1)
res = torch.sum(res * period_weight, -1)
# residual connection
res = res + x
return res
class Model(nn.Module):
"""
Paper link: https://openreview.net/pdf?id=ju_Uqw384Oq
"""
def __init__(self, configs):
super(Model, self).__init__()
self.configs = configs
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.label_len = configs.label_len
self.pred_len = configs.pred_len
self.model = nn.ModuleList([TimesBlock(configs)
for _ in range(configs.e_layers)])
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.layer = configs.e_layers
self.layer_norm = nn.LayerNorm(configs.d_model)
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.predict_linear = nn.Linear(
self.seq_len, self.pred_len + self.seq_len)
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
self.projection = nn.Linear(
configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(
configs.d_model * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc.sub(means)
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc = x_enc.div(stdev)
# embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(
0, 2, 1) # align temporal dimension
# TimesNet
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# project back
dec_out = self.projection(enc_out)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out.mul(
(stdev[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1)))
dec_out = dec_out.add(
(means[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1)))
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# Normalization from Non-stationary Transformer
means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
means = means.unsqueeze(1).detach()
x_enc = x_enc.sub(means)
x_enc = x_enc.masked_fill(mask == 0, 0)
stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /
torch.sum(mask == 1, dim=1) + 1e-5)
stdev = stdev.unsqueeze(1).detach()
x_enc = x_enc.div(stdev)
# embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]
# TimesNet
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# project back
dec_out = self.projection(enc_out)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out.mul(
(stdev[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1)))
dec_out = dec_out.add(
(means[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1)))
return dec_out
def anomaly_detection(self, x_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc.sub(means)
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc = x_enc.div(stdev)
# embedding
enc_out = self.enc_embedding(x_enc, None) # [B,T,C]
# TimesNet
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# project back
dec_out = self.projection(enc_out)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out.mul(
(stdev[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1)))
dec_out = dec_out.add(
(means[:, 0, :].unsqueeze(1).repeat(
1, self.pred_len + self.seq_len, 1)))
return dec_out
def classification(self, x_enc, x_mark_enc):
# embedding
enc_out = self.enc_embedding(x_enc, None) # [B,T,C]
# TimesNet
for i in range(self.layer):
enc_out = self.layer_norm(self.model[i](enc_out))
# Output
# the output transformer encoder/decoder embeddings don't include non-linearity
output = self.act(enc_out)
output = self.dropout(output)
# zero-out padding embeddings
output = output * x_mark_enc.unsqueeze(-1)
# (batch_size, seq_length * d_model)
output = output.reshape(output.shape[0], -1)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(
x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/Transformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import DataEmbedding
import numpy as np
class Model(nn.Module):
"""
Vanilla Transformer
with O(L^2) complexity
Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.pred_len = configs.pred_len
# Embedding
self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
# Decoder
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
configs.dropout)
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(
FullAttention(True, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False),
configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation,
)
for l in range(configs.d_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model),
projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
)
if self.task_name == 'imputation':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# Embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out)
return dec_out
def anomaly_detection(self, x_enc):
# Embedding
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out)
return dec_out
def classification(self, x_enc, x_mark_enc):
# Embedding
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# Output
output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity
output = self.dropout(output)
output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings
output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: models/WPMixer.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 5 16:10:01 2025
@author: Murad
SISLab, USF
mmurad@usf.edu
https://github.com/Secure-and-Intelligent-Systems-Lab/WPMixer
"""
import torch.nn as nn
import torch
from layers.DWT_Decomposition import Decomposition
class TokenMixer(nn.Module):
def __init__(self, input_seq=[], batch_size=[], channel=[], pred_seq=[], dropout=[], factor=[], d_model=[]):
super(TokenMixer, self).__init__()
self.input_seq = input_seq
self.batch_size = batch_size
self.channel = channel
self.pred_seq = pred_seq
self.dropout = dropout
self.factor = factor
self.d_model = d_model
self.dropoutLayer = nn.Dropout(self.dropout)
self.layers = nn.Sequential(nn.Linear(self.input_seq, self.pred_seq * self.factor),
nn.GELU(),
nn.Dropout(self.dropout),
nn.Linear(self.pred_seq * self.factor, self.pred_seq)
)
def forward(self, x):
x = x.transpose(1, 2)
x = self.layers(x)
x = x.transpose(1, 2)
return x
class Mixer(nn.Module):
def __init__(self,
input_seq=[],
out_seq=[],
batch_size=[],
channel=[],
d_model=[],
dropout=[],
tfactor=[],
dfactor=[]):
super(Mixer, self).__init__()
self.input_seq = input_seq
self.pred_seq = out_seq
self.batch_size = batch_size
self.channel = channel
self.d_model = d_model
self.dropout = dropout
self.tfactor = tfactor # expansion factor for patch mixer
self.dfactor = dfactor # expansion factor for embedding mixer
self.tMixer = TokenMixer(input_seq=self.input_seq, batch_size=self.batch_size, channel=self.channel,
pred_seq=self.pred_seq, dropout=self.dropout, factor=self.tfactor,
d_model=self.d_model)
self.dropoutLayer = nn.Dropout(self.dropout)
self.norm1 = nn.BatchNorm2d(self.channel)
self.norm2 = nn.BatchNorm2d(self.channel)
self.embeddingMixer = nn.Sequential(nn.Linear(self.d_model, self.d_model * self.dfactor),
nn.GELU(),
nn.Dropout(self.dropout),
nn.Linear(self.d_model * self.dfactor, self.d_model))
def forward(self, x):
'''
Parameters
----------
x : input: [Batch, Channel, Patch_number, d_model]
Returns
-------
x: output: [Batch, Channel, Patch_number, d_model]
'''
x = self.norm1(x)
x = x.permute(0, 3, 1, 2)
x = self.dropoutLayer(self.tMixer(x))
x = x.permute(0, 2, 3, 1)
x = self.norm2(x)
x = x + self.dropoutLayer(self.embeddingMixer(x))
return x
class ResolutionBranch(nn.Module):
def __init__(self,
input_seq=[],
pred_seq=[],
batch_size=[],
channel=[],
d_model=[],
dropout=[],
embedding_dropout=[],
tfactor=[],
dfactor=[],
patch_len=[],
patch_stride=[]):
super(ResolutionBranch, self).__init__()
self.input_seq = input_seq
self.pred_seq = pred_seq
self.batch_size = batch_size
self.channel = channel
self.d_model = d_model
self.dropout = dropout
self.embedding_dropout = embedding_dropout
self.tfactor = tfactor
self.dfactor = dfactor
self.patch_len = patch_len
self.patch_stride = patch_stride
self.patch_num = int((self.input_seq - self.patch_len) / self.patch_stride + 2)
self.patch_norm = nn.BatchNorm2d(self.channel)
self.patch_embedding_layer = nn.Linear(self.patch_len, self.d_model) # shared among all channels
self.mixer1 = Mixer(input_seq=self.patch_num,
out_seq=self.patch_num,
batch_size=self.batch_size,
channel=self.channel,
d_model=self.d_model,
dropout=self.dropout,
tfactor=self.tfactor,
dfactor=self.dfactor)
self.mixer2 = Mixer(input_seq=self.patch_num,
out_seq=self.patch_num,
batch_size=self.batch_size,
channel=self.channel,
d_model=self.d_model,
dropout=self.dropout,
tfactor=self.tfactor,
dfactor=self.dfactor)
self.norm = nn.BatchNorm2d(self.channel)
self.dropoutLayer = nn.Dropout(self.embedding_dropout)
self.head = nn.Sequential(nn.Flatten(start_dim=-2, end_dim=-1),
nn.Linear(self.patch_num * self.d_model, self.pred_seq))
def forward(self, x):
'''
Parameters
----------
x : input coefficient series: [Batch, channel, length_of_coefficient_series]
Returns
-------
out : predicted coefficient series: [Batch, channel, length_of_pred_coeff_series]
'''
x_patch = self.do_patching(x)
x_patch = self.patch_norm(x_patch)
x_emb = self.dropoutLayer(self.patch_embedding_layer(x_patch))
out = self.mixer1(x_emb)
res = out
out = res + self.mixer2(out)
out = self.norm(out)
out = self.head(out)
return out
def do_patching(self, x):
x_end = x[:, :, -1:]
x_padding = x_end.repeat(1, 1, self.patch_stride)
x_new = torch.cat((x, x_padding), dim=-1)
x_patch = x_new.unfold(dimension=-1, size=self.patch_len, step=self.patch_stride)
return x_patch
class WPMixerCore(nn.Module):
def __init__(self,
input_length=[],
pred_length=[],
wavelet_name=[],
level=[],
batch_size=[],
channel=[],
d_model=[],
dropout=[],
embedding_dropout=[],
tfactor=[],
dfactor=[],
device=[],
patch_len=[],
patch_stride=[],
no_decomposition=[],
use_amp=[]):
super(WPMixerCore, self).__init__()
self.input_length = input_length
self.pred_length = pred_length
self.wavelet_name = wavelet_name
self.level = level
self.batch_size = batch_size
self.channel = channel
self.d_model = d_model
self.dropout = dropout
self.embedding_dropout = embedding_dropout
self.device = device
self.no_decomposition = no_decomposition
self.tfactor = tfactor
self.dfactor = dfactor
self.use_amp = use_amp
self.Decomposition_model = Decomposition(input_length=self.input_length,
pred_length=self.pred_length,
wavelet_name=self.wavelet_name,
level=self.level,
batch_size=self.batch_size,
channel=self.channel,
d_model=self.d_model,
tfactor=self.tfactor,
dfactor=self.dfactor,
device=self.device,
no_decomposition=self.no_decomposition,
use_amp=self.use_amp)
self.input_w_dim = self.Decomposition_model.input_w_dim # list of the length of the input coefficient series
self.pred_w_dim = self.Decomposition_model.pred_w_dim # list of the length of the predicted coefficient series
self.patch_len = patch_len
self.patch_stride = patch_stride
# (m+1) number of resolutionBranch
self.resolutionBranch = nn.ModuleList([ResolutionBranch(input_seq=self.input_w_dim[i],
pred_seq=self.pred_w_dim[i],
batch_size=self.batch_size,
channel=self.channel,
d_model=self.d_model,
dropout=self.dropout,
embedding_dropout=self.embedding_dropout,
tfactor=self.tfactor,
dfactor=self.dfactor,
patch_len=self.patch_len,
patch_stride=self.patch_stride) for i in
range(len(self.input_w_dim))])
def forward(self, xL):
'''
Parameters
----------
xL : Look back window: [Batch, look_back_length, channel]
Returns
-------
xT : Prediction time series: [Batch, prediction_length, output_channel]
'''
x = xL.transpose(1, 2) # [batch, channel, look_back_length]
# xA: approximation coefficient series,
# xD: detail coefficient series
# yA: predicted approximation coefficient series
# yD: predicted detail coefficient series
xA, xD = self.Decomposition_model.transform(x)
yA = self.resolutionBranch[0](xA)
yD = []
for i in range(len(xD)):
yD_i = self.resolutionBranch[i + 1](xD[i])
yD.append(yD_i)
y = self.Decomposition_model.inv_transform(yA, yD)
y = y.transpose(1, 2)
xT = y[:, -self.pred_length:, :] # decomposition output is always even, but pred length can be odd
return xT
class Model(nn.Module):
def __init__(self, args, tfactor=5, dfactor=5, wavelet='db2', level=1, stride=8, no_decomposition=False):
super(Model, self).__init__()
self.args = args
self.task_name = args.task_name
self.wpmixerCore = WPMixerCore(input_length=self.args.seq_len,
pred_length=self.args.pred_len,
wavelet_name=wavelet,
level=level,
batch_size=self.args.batch_size,
channel=self.args.c_out,
d_model=self.args.d_model,
dropout=self.args.dropout,
embedding_dropout=self.args.dropout,
tfactor=tfactor,
dfactor=dfactor,
device=self.args.device,
patch_len=self.args.patch_len,
patch_stride=stride,
no_decomposition=no_decomposition,
use_amp=self.args.use_amp)
def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark):
# Normalization
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
pred = self.wpmixerCore(x_enc)
pred = pred[:, :, -self.args.c_out:]
# De-Normalization
dec_out = pred * (stdev[:, 0].unsqueeze(1).repeat(1, self.args.pred_len, 1))
dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.args.pred_len, 1))
return dec_out
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out # [B, L, D]
if self.task_name == 'imputation':
raise NotImplementedError("Task imputation for WPMixer is temporarily not supported")
if self.task_name == 'anomaly_detection':
raise NotImplementedError("Task anomaly_detection for WPMixer is temporarily not supported")
if self.task_name == 'classification':
raise NotImplementedError("Task classification for WPMixer is temporarily not supported")
return None
================================================
FILE: models/__init__.py
================================================
pass
================================================
FILE: models/iTransformer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Transformer_EncDec import Encoder, EncoderLayer
from layers.SelfAttention_Family import FullAttention, AttentionLayer
from layers.Embed import DataEmbedding_inverted
import numpy as np
class Model(nn.Module):
"""
Paper link: https://arxiv.org/abs/2310.06625
"""
def __init__(self, configs):
super(Model, self).__init__()
self.task_name = configs.task_name
self.seq_len = configs.seq_len
self.pred_len = configs.pred_len
# Embedding
self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq,
configs.dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
FullAttention(False, configs.factor, attention_dropout=configs.dropout,
output_attention=False), configs.d_model, configs.n_heads),
configs.d_model,
configs.d_ff,
dropout=configs.dropout,
activation=configs.activation
) for l in range(configs.e_layers)
],
norm_layer=torch.nn.LayerNorm(configs.d_model)
)
# Decoder
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
if self.task_name == 'imputation':
self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
if self.task_name == 'anomaly_detection':
self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
if self.task_name == 'classification':
self.act = F.gelu
self.dropout = nn.Dropout(configs.dropout)
self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class)
def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, _, N = x_enc.shape
# Embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, L, N = x_enc.shape
# Embedding
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
return dec_out
def anomaly_detection(self, x_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
_, L, N = x_enc.shape
# Embedding
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
return dec_out
def classification(self, x_enc, x_mark_enc):
# Embedding
enc_out = self.enc_embedding(x_enc, None)
enc_out, attns = self.encoder(enc_out, attn_mask=None)
# Output
output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity
output = self.dropout(output)
output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model)
output = self.projection(output) # (batch_size, num_classes)
return output
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
return dec_out[:, -self.pred_len:, :] # [B, L, D]
if self.task_name == 'imputation':
dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
return dec_out # [B, L, D]
if self.task_name == 'anomaly_detection':
dec_out = self.anomaly_detection(x_enc)
return dec_out # [B, L, D]
if self.task_name == 'classification':
dec_out = self.classification(x_enc, x_mark_enc)
return dec_out # [B, N]
return None
================================================
FILE: requirements.txt
================================================
einops==0.8.1
local-attention==1.11.2
reformer-pytorch==1.4.4
numpy==2.1.2
scipy==1.16.3
scikit-learn==1.7.2
pandas==2.3.3
matplotlib==3.10.8
sktime==0.40.1
sympy==1.13.1
PyWavelets==1.9.0
datasets==4.5.0
tqdm==4.66.5
patool==4.0.3
transformers==4.57.3
huggingface_hub==0.36.0
chronos-forecasting==2.2.1
tirex-ts==1.3.0
timesfm==1.3.0
# uni2ts deps
######
gluonts==0.16.2
lightning==2.6.0
hydra-core==1.3.0
jax==0.8.1
jaxtyping==0.3.4
orjson==3.11.5
tensorboard==2.20.0
python-dotenv==1.0.0
######
================================================
FILE: run.py
================================================
import argparse
import os
import torch
import torch.backends
from utils.print_args import print_args
import random
import numpy as np
if __name__ == '__main__':
fix_seed = 2021
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)
parser = argparse.ArgumentParser(description='TimesNet')
# basic config
parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
parser.add_argument('--model', type=str, required=True, default='Autoformer',
help='model name, options: [Autoformer, Transformer, TimesNet]')
# data loader
parser.add_argument('--data', type=str, required=True, default='ETTh1', help='dataset type')
parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
parser.add_argument('--features', type=str, default='M',
help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
parser.add_argument('--freq', type=str, default='h',
help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
# forecasting task
parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
parser.add_argument('--label_len', type=int, default=48, help='start token length')
parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
# inputation task
parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
# anomaly detection task
parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%%)')
# model define
parser.add_argument('--expand', type=int, default=2, help='expansion factor for Mamba')
parser.add_argument('--d_conv', type=int, default=4, help='conv kernel size for Mamba')
parser.add_argument('--tv_dt', type=int, default=0, help='whether to use time variant dt for MambaSL')
parser.add_argument('--tv_B', type=int, default=0, help='whether to use time variant B for MambaSL')
parser.add_argument('--tv_C', type=int, default=0, help='whether to use time variant C for MambaSL')
parser.add_argument('--use_D', type=int, default=0, help='whether to use D for MambaSL')
parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
parser.add_argument('--c_out', type=int, default=7, help='output size')
parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
parser.add_argument('--factor', type=int, default=1, help='attn factor')
parser.add_argument('--distil', action='store_false',
help='whether to use distilling in encoder, using this argument means not using distilling',
default=True)
parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
parser.add_argument('--embed', type=str, default='timeF',
help='time features encoding, options:[timeF, fixed, learned]')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--channel_independence', type=int, default=1,
help='0: channel dependence 1: channel independence for FreTS model')
parser.add_argument('--decomp_method', type=str, default='moving_avg',
help='method of series decompsition, only support moving_avg or dft_decomp')
parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0')
parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers')
parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size')
parser.add_argument('--down_sampling_method', type=str, default=None,
help='down sampling method, only support avg, max, conv')
parser.add_argument('--seg_len', type=int, default=96,
help='the length of segmen-wise iteration of SegRNN')
# optimization
parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
parser.add_argument('--itr', type=int, default=1, help='experiments times')
parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
parser.add_argument('--des', type=str, default='test', help='exp description')
parser.add_argument('--loss', type=str, default='MSE', help='loss function')
parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
# GPU
parser.add_argument('--use_gpu', action='store_true', default=True, help='use gpu (default: on)')
parser.add_argument('--no_use_gpu', action='store_false', dest='use_gpu', help='disable gpu (force cpu)')
parser.add_argument('--gpu', type=int, default=0, help='gpu')
parser.add_argument('--gpu_type', type=str, default='cuda', help='gpu type') # cuda or mps
parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
# de-stationary projector params
parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
help='hidden layer dimensions of projector (List)')
parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
# metrics (dtw)
parser.add_argument('--use_dtw', action='store_true', default=False,
help='enable dtw metric (time consuming; default: off)')
# Augmentation
parser.add_argument('--augmentation_ratio', type=int, default=0, help="How many times to augment")
parser.add_argument('--seed', type=int, default=2, help="Randomization seed")
parser.add_argument('--jitter', default=False, action="store_true", help="Jitter preset augmentation")
parser.add_argument('--scaling', default=False, action="store_true", help="Scaling preset augmentation")
parser.add_argument('--permutation', default=False, action="store_true",
help="Equal Length Permutation preset augmentation")
parser.add_argument('--randompermutation', default=False, action="store_true",
help="Random Length Permutation preset augmentation")
parser.add_argument('--magwarp', default=False, action="store_true", help="Magnitude warp preset augmentation")
parser.add_argument('--timewarp', default=False, action="store_true", help="Time warp preset augmentation")
parser.add_argument('--windowslice', default=False, action="store_true", help="Window slice preset augmentation")
parser.add_argument('--windowwarp', default=False, action="store_true", help="Window warp preset augmentation")
parser.add_argument('--rotation', default=False, action="store_true", help="Rotation preset augmentation")
parser.add_argument('--spawner', default=False, action="store_true", help="SPAWNER preset augmentation")
parser.add_argument('--dtwwarp', default=False, action="store_true", help="DTW warp preset augmentation")
parser.add_argument('--shapedtwwarp', default=False, action="store_true", help="Shape DTW warp preset augmentation")
parser.add_argument('--wdba', default=False, action="store_true", help="Weighted DBA preset augmentation")
parser.add_argument('--discdtw', default=False, action="store_true",
help="Discrimitive DTW warp preset augmentation")
parser.add_argument('--discsdtw', default=False, action="store_true",
help="Discrimitive shapeDTW warp preset augmentation")
parser.add_argument('--extra_tag', type=str, default="", help="Anything extra")
# TimeXer
parser.add_argument('--patch_len', type=int, default=16, help='patch length')
# GCN
parser.add_argument('--node_dim', type=int, default=10, help='each node embbed to dim dimentions')
parser.add_argument('--gcn_depth', type=int, default=2, help='')
parser.add_argument('--gcn_dropout', type=float, default=0.3, help='')
parser.add_argument('--propalpha', type=float, default=0.3, help='')
parser.add_argument('--conv_channel', type=int, default=32, help='')
parser.add_argument('--skip_channel', type=int, default=32, help='')
parser.add_argument('--individual', action='store_true', default=False,
help='DLinear: a linear layer for each variate(channel) individually')
# TimeFilter
parser.add_argument('--alpha', type=float, default=0.1, help='KNN for Graph Construction')
parser.add_argument('--top_p', type=float, default=0.5, help='Dynamic Routing in MoE')
parser.add_argument('--pos', type=int, choices=[0, 1], default=1, help='Positional Embedding. Set pos to 0 or 1')
args = parser.parse_args()
if torch.cuda.is_available() and args.use_gpu:
args.device = torch.device('cuda:{}'.format(args.gpu))
print('Using GPU')
else:
if hasattr(torch.backends, "mps"):
args.device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
else:
args.device = torch.device("cpu")
print('Using cpu or mps')
if args.use_gpu and args.use_multi_gpu:
args.devices = args.devices.replace(' ', '')
device_ids = args.devices.split(',')
args.device_ids = [int(id_) for id_ in device_ids]
args.gpu = args.device_ids[0]
print('Args in experiment:')
print_args(args)
if args.task_name == 'long_term_forecast':
from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
Exp = Exp_Long_Term_Forecast
elif args.task_name == 'short_term_forecast':
from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
Exp = Exp_Short_Term_Forecast
elif args.task_name == 'imputation':
from exp.exp_imputation import Exp_Imputation
Exp = Exp_Imputation
elif args.task_name == 'anomaly_detection':
from exp.exp_anomaly_detection import Exp_Anomaly_Detection
Exp = Exp_Anomaly_Detection
elif args.task_name == 'classification':
from exp.exp_classification import Exp_Classification
Exp = Exp_Classification
elif args.task_name == 'zero_shot_forecast':
from exp.exp_zero_shot_forecasting import Exp_Zero_Shot_Forecast
Exp = Exp_Zero_Shot_Forecast
else:
from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
Exp = Exp_Long_Term_Forecast
if args.is_training:
for ii in range(args.itr):
# setting record of experiments
exp = Exp(args) # set experiments
setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format(
args.task_name,
args.model_id,
args.model,
args.data,
args.features,
args.seq_len,
args.label_len,
args.pred_len,
args.d_model,
args.n_heads,
args.e_layers,
args.d_layers,
args.d_ff,
args.expand,
args.d_conv,
args.factor,
args.embed,
args.distil,
args.des, ii)
# Override setting for specific model to ensure proper checkpoint naming and logging
if args.model == 'MambaSingleLayer' and args.task_name == 'classification':
setting = f'{args.task_name}_CLS_{args.model_id}_{args.model}_{args.data}_ft{args.features}' \
+ f'_sl{args.seq_len}_ll{args.label_len}_pl{args.pred_len}_dm{args.d_model}_ds{args.d_ff}' \
+ f'_expand{args.expand}_dc{args.d_conv}_nk{args.num_kernels}' \
+ f'_tvdt{int(args.tv_dt)}_tvB{int(args.tv_B)}_tvC{int(args.tv_C)}_useD{int(args.use_D)}_{args.des}_{ii}'
print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
exp.train(setting)
print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting)
if args.use_gpu:
if args.gpu_type == 'mps':
torch.backends.mps.empty_cache()
elif args.gpu_type == 'cuda':
torch.cuda.empty_cache()
else:
exp = Exp(args) # set experiments
ii = 0
setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format(
args.task_name,
args.model_id,
args.model,
args.data,
args.features,
args.seq_len,
args.label_len,
args.pred_len,
args.d_model,
args.n_heads,
args.e_layers,
args.d_layers,
args.d_ff,
args.expand,
args.d_conv,
args.factor,
args.embed,
args.distil,
args.des, ii)
# Override setting for specific model to ensure proper checkpoint naming and logging
if args.model == 'MambaSingleLayer' and args.task_name == 'classification':
setting = f'{args.task_name}_CLS_{args.model_id}_{args.model}_{args.data}_ft{args.features}' \
+ f'_sl{args.seq_len}_ll{args.label_len}_pl{args.pred_len}_dm{args.d_model}_ds{args.d_ff}' \
+ f'_expand{args.expand}_dc{args.d_conv}_nk{args.num_kernels}' \
+ f'_tvdt{args.tv_dt}_tvB{args.tv_B}_tvC{args.tv_C}_useD{int(args.use_D)}_{args.des}_{ii}'
print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting, test=1)
if args.use_gpu:
if args.gpu_type == 'mps':
torch.backends.mps.empty_cache()
elif args.gpu_type == 'cuda':
torch.cuda.empty_cache()
================================================
FILE: scripts/anomaly_detection/MSL/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model Autoformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model Crossformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/DLinear.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model DLinear \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 100 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/ETSformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model ETSformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 100 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--d_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/FEDformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model FEDformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=6
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model FiLM \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 100 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 32 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/Informer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model Informer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/KANAD.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model KANAD \
--data MSL \
--features M \
--seq_len 64 \
--d_model 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--learning_rate 0.01 \
--batch_size 128 \
--num_workers 4 \
--patience 5 \
--train_epochs 100
================================================
FILE: scripts/anomaly_detection/MSL/LightTS.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model LightTS \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=1
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model MICN \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model Pyraformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/Reformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model Reformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=2
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model TimesNet \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 8 \
--d_ff 16 \
--e_layers 1 \
--enc_in 55 \
--c_out 55 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 1
================================================
FILE: scripts/anomaly_detection/MSL/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model Transformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/MSL/iTransformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/MSL \
--model_id MSL \
--model iTransformer \
--data MSL \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 55 \
--c_out 55 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/PSM/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=6
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/PSM \
--model_id PSM \
--model Autoformer \
--data PSM \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/PSM/DLinear.sh
================================================
export CUDA_VISIBLE_DEVICES=6
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/PSM \
--model_id PSM \
--model DLinear \
--data PSM \
--features M \
--seq_len 100 \
--pred_len 100 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/PSM/KANAD.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/PSM \
--model_id PSM \
--model KANAD \
--data PSM \
--features M \
--seq_len 64 \
--d_model 6 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--learning_rate 0.01 \
--batch_size 128 \
--num_workers 4 \
--patience 5 \
--train_epochs 100
================================================
FILE: scripts/anomaly_detection/PSM/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=6
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/PSM \
--model_id PSM \
--model TimesNet \
--data PSM \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 64 \
--d_ff 64 \
--e_layers 2 \
--enc_in 25 \
--c_out 25 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/PSM/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=6
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/PSM \
--model_id PSM \
--model Transformer \
--data PSM \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/SMAP/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMAP \
--model_id SMAP \
--model Autoformer \
--data SMAP \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/SMAP/KANAD.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMAP \
--model_id SMAP \
--model KANAD \
--data SMAP \
--features M \
--seq_len 64 \
--d_model 3 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--learning_rate 0.01 \
--batch_size 128 \
--num_workers 4 \
--patience 5 \
--train_epochs 100
================================================
FILE: scripts/anomaly_detection/SMAP/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMAP \
--model_id SMAP \
--model TimesNet \
--data SMAP \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 25 \
--c_out 25 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/SMAP/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMAP \
--model_id SMAP \
--model Transformer \
--data SMAP \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 25 \
--c_out 25 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/SMD/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=2
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMD \
--model_id SMD \
--model Autoformer \
--data SMD \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 38 \
--c_out 38 \
--anomaly_ratio 0.5 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/SMD/KANAD.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMD \
--model_id SMD \
--model KANAD \
--data SMD \
--features M \
--seq_len 96 \
--d_model 4 \
--enc_in 38 \
--c_out 38 \
--anomaly_ratio 0.5 \
--learning_rate 0.01 \
--batch_size 128 \
--num_workers 4 \
--patience 5 \
--train_epochs 100
================================================
FILE: scripts/anomaly_detection/SMD/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=2
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMD \
--model_id SMD \
--model TimesNet \
--data SMD \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 64 \
--d_ff 64 \
--e_layers 2 \
--enc_in 38 \
--c_out 38 \
--top_k 5 \
--anomaly_ratio 0.5 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/SMD/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=2
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SMD \
--model_id SMD \
--model Transformer \
--data SMD \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 38 \
--c_out 38 \
--anomaly_ratio 0.5 \
--batch_size 128 \
--train_epochs 10
================================================
FILE: scripts/anomaly_detection/SWAT/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model Autoformer \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/SWAT/KANAD.sh
================================================
export CUDA_VISIBLE_DEVICES=0
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model KANAD \
--data SWAT \
--features M \
--seq_len 80 \
--d_model 1 \
--enc_in 51 \
--c_out 51 \
--anomaly_ratio 1 \
--learning_rate 0.01 \
--batch_size 128 \
--num_workers 4 \
--patience 5 \
--train_epochs 100
================================================
FILE: scripts/anomaly_detection/SWAT/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=1
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 8 \
--d_ff 8 \
--e_layers 3 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 16 \
--d_ff 16 \
--e_layers 3 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 32 \
--d_ff 32 \
--e_layers 3 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 64 \
--d_ff 64 \
--e_layers 3 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 8 \
--d_ff 8 \
--e_layers 2 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 16 \
--d_ff 16 \
--e_layers 2 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 32 \
--d_ff 32 \
--e_layers 2 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model TimesNet \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 64 \
--d_ff 64 \
--e_layers 2 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/anomaly_detection/SWAT/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
python -u run.py \
--task_name anomaly_detection \
--is_training 1 \
--root_path ./dataset/SWaT \
--model_id SWAT \
--model Transformer \
--data SWAT \
--features M \
--seq_len 100 \
--pred_len 0 \
--d_model 128 \
--d_ff 128 \
--e_layers 3 \
--enc_in 51 \
--c_out 51 \
--top_k 3 \
--anomaly_ratio 1 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/classification/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Autoformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=Crossformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/DLinear.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=DLinear
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/ETSformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=ETSformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--d_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/FEDformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=FEDformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=FiLM
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--seq_len 1751 \
--pred_len 1751 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/Informer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Informer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/LightTS.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=LightTS
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=MICN
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--c_out 3 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/MambaSL.out
================================================
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: ArticularyWordRecognitionModel: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/ArticularyWordRecognition
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 128
n heads: 8 e layers: 2
d layers: 1 d FF: 8
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
275
300
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_ArticularyWordRecognition_MambaSingleLayer_UEA_ftM_sl144_ll0_pl0_dm128_ds8_expand1_dc4_nk3_tvdt0_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
300
loading model
test shape: torch.Size([300, 25]) torch.Size([300, 1])
accuracy:0.9933333333333333
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: AtrialFibrillation Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/AtrialFibrillation
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 13
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 16
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
15
15
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_AtrialFibrillation_MambaSingleLayer_UEA_ftM_sl640_ll0_pl0_dm32_ds16_expand1_dc4_nk13_tvdt1_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
15
loading model
test shape: torch.Size([15, 3]) torch.Size([15, 1])
accuracy:0.5333333333333333
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: BasicMotions Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/BasicMotions
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
40
40
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_BasicMotions_MambaSingleLayer_UEA_ftM_sl100_ll0_pl0_dm32_ds1_expand1_dc4_nk3_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40
loading model
test shape: torch.Size([40, 4]) torch.Size([40, 1])
accuracy:1.0
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: CharacterTrajectoriesModel: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/CharacterTrajectories
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 4
Enc In: 7 Dec In: 7
C Out: 7 d model: 128
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
1422
1436
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_CharacterTrajectories_MambaSingleLayer_UEA_ftM_sl182_ll0_pl0_dm128_ds1_expand1_dc4_nk4_tvdt1_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1436
loading model
test shape: torch.Size([1436, 20]) torch.Size([1436, 1])
accuracy:0.9972144846796658
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: Cricket Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/Cricket
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 24
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
108
72
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_Cricket_MambaSingleLayer_UEA_ftM_sl1197_ll0_pl0_dm32_ds4_expand1_dc4_nk24_tvdt0_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
72
loading model
test shape: torch.Size([72, 12]) torch.Size([72, 1])
accuracy:1.0
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: DuckDuckGeese Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/DuckDuckGeese
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 6
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 2
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
50
50
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_DuckDuckGeese_MambaSingleLayer_UEA_ftM_sl270_ll0_pl0_dm1024_ds2_expand1_dc4_nk6_tvdt0_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
50
loading model
test shape: torch.Size([50, 5]) torch.Size([50, 1])
accuracy:0.7
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: EigenWorms Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/EigenWorms
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 360
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 4
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
128
131
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_EigenWorms_MambaSingleLayer_UEA_ftM_sl17984_ll0_pl0_dm32_ds1_expand1_dc4_nk360_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
131
loading model
test shape: torch.Size([131, 5]) torch.Size([131, 1])
accuracy:0.8396946564885496
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: Epilepsy Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/Epilepsy
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 5
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
137
138
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_Epilepsy_MambaSingleLayer_UEA_ftM_sl206_ll0_pl0_dm32_ds1_expand1_dc4_nk5_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
138
loading model
test shape: torch.Size([138, 4]) torch.Size([138, 1])
accuracy:0.9782608695652174
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: ERing Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/ERing
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 128
n heads: 8 e layers: 2
d layers: 1 d FF: 8
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
30
270
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_ERing_MambaSingleLayer_UEA_ftM_sl65_ll0_pl0_dm128_ds8_expand1_dc4_nk3_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
270
loading model
test shape: torch.Size([270, 6]) torch.Size([270, 1])
accuracy:0.937037037037037
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: EthanolConcentrationModel: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/EthanolConcentration
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 36
Enc In: 7 Dec In: 7
C Out: 7 d model: 512
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
261
263
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_EthanolConcentration_MambaSingleLayer_UEA_ftM_sl1751_ll0_pl0_dm512_ds4_expand1_dc4_nk36_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
263
loading model
test shape: torch.Size([263, 4]) torch.Size([263, 1])
accuracy:0.42585551330798477
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: FaceDetection Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/FaceDetection
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 256
n heads: 8 e layers: 2
d layers: 1 d FF: 16
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
5890
3524
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_FaceDetection_MambaSingleLayer_UEA_ftM_sl62_ll0_pl0_dm256_ds16_expand1_dc4_nk3_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
3524
loading model
test shape: torch.Size([3524, 2]) torch.Size([3524, 1])
accuracy:0.6929625425652668
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: FingerMovements Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/FingerMovements
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
316
100
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_FingerMovements_MambaSingleLayer_UEA_ftM_sl50_ll0_pl0_dm32_ds1_expand1_dc4_nk3_tvdt0_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
100
loading model
test shape: torch.Size([100, 2]) torch.Size([100, 1])
accuracy:0.71
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: HandMovementDirectionModel: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/HandMovementDirection
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 8
Enc In: 7 Dec In: 7
C Out: 7 d model: 256
n heads: 8 e layers: 2
d layers: 1 d FF: 16
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
160
74
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_HandMovementDirection_MambaSingleLayer_UEA_ftM_sl400_ll0_pl0_dm256_ds16_expand1_dc4_nk8_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
74
loading model
test shape: torch.Size([74, 4]) torch.Size([74, 1])
accuracy:0.7027027027027027
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: Handwriting Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/Handwriting
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 4
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
150
850
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_Handwriting_MambaSingleLayer_UEA_ftM_sl152_ll0_pl0_dm1024_ds4_expand1_dc4_nk4_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
850
loading model
test shape: torch.Size([850, 26]) torch.Size([850, 1])
accuracy:0.6082352941176471
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: Heartbeat Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/Heartbeat
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 9
Enc In: 7 Dec In: 7
C Out: 7 d model: 64
n heads: 8 e layers: 2
d layers: 1 d FF: 16
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
204
205
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_Heartbeat_MambaSingleLayer_UEA_ftM_sl405_ll0_pl0_dm64_ds16_expand1_dc4_nk9_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
205
loading model
test shape: torch.Size([205, 2]) torch.Size([205, 1])
accuracy:0.8048780487804879
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: InsectWingbeat Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/InsectWingbeat
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 8
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
25000
25000
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_InsectWingbeat_MambaSingleLayer_UEA_ftM_sl22_ll0_pl0_dm1024_ds8_expand1_dc4_nk3_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25000
loading model
test shape: torch.Size([25000, 10]) torch.Size([25000, 1])
accuracy:0.66304
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: JapaneseVowels Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/JapaneseVowels
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 128
n heads: 8 e layers: 2
d layers: 1 d FF: 8
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
270
370
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_JapaneseVowels_MambaSingleLayer_UEA_ftM_sl29_ll0_pl0_dm128_ds8_expand1_dc4_nk3_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
370
loading model
test shape: torch.Size([370, 9]) torch.Size([370, 1])
accuracy:0.9864864864864865
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: Libras Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/Libras
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
180
180
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_Libras_MambaSingleLayer_UEA_ftM_sl45_ll0_pl0_dm1024_ds4_expand1_dc4_nk3_tvdt1_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
180
loading model
test shape: torch.Size([180, 15]) torch.Size([180, 1])
accuracy:0.9166666666666666
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: LSST Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/LSST
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
2459
2466
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_LSST_MambaSingleLayer_UEA_ftM_sl36_ll0_pl0_dm1024_ds4_expand1_dc4_nk3_tvdt1_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
2466
loading model
test shape: torch.Size([2466, 14]) torch.Size([2466, 1])
accuracy:0.4557988645579886
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: MotorImagery Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/MotorImagery
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 60
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 8
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
278
100
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_MotorImagery_MambaSingleLayer_UEA_ftM_sl3000_ll0_pl0_dm32_ds8_expand1_dc4_nk60_tvdt0_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
100
loading model
test shape: torch.Size([100, 2]) torch.Size([100, 1])
accuracy:0.69
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: NATOPS Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/NATOPS
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 512
n heads: 8 e layers: 2
d layers: 1 d FF: 2
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
180
180
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_NATOPS_MambaSingleLayer_UEA_ftM_sl51_ll0_pl0_dm512_ds2_expand1_dc4_nk3_tvdt0_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
180
loading model
test shape: torch.Size([180, 6]) torch.Size([180, 1])
accuracy:0.9888888888888889
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: PEMS-SF Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/PEMS-SF
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 512
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
267
173
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_PEMS-SF_MambaSingleLayer_UEA_ftM_sl144_ll0_pl0_dm512_ds1_expand1_dc4_nk3_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
173
loading model
test shape: torch.Size([173, 7]) torch.Size([173, 1])
accuracy:0.8554913294797688
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: PenDigits Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/PenDigits
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 64
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
7494
3498
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_PenDigits_MambaSingleLayer_UEA_ftM_sl8_ll0_pl0_dm64_ds1_expand1_dc4_nk3_tvdt0_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
3498
loading model
test shape: torch.Size([3498, 10]) torch.Size([3498, 1])
accuracy:0.9925671812464265
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: PhonemeSpectra Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/PhonemeSpectra
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 5
Enc In: 7 Dec In: 7
C Out: 7 d model: 256
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
3315
3353
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_PhonemeSpectra_MambaSingleLayer_UEA_ftM_sl217_ll0_pl0_dm256_ds4_expand1_dc4_nk5_tvdt1_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
3353
loading model
test shape: torch.Size([3353, 39]) torch.Size([3353, 1])
accuracy:0.3033104682373993
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: RacketSports Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/RacketSports
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 4
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
151
152
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_RacketSports_MambaSingleLayer_UEA_ftM_sl30_ll0_pl0_dm1024_ds4_expand1_dc4_nk3_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
152
loading model
test shape: torch.Size([152, 4]) torch.Size([152, 1])
accuracy:0.9276315789473685
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: SelfRegulationSCP1 Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/SelfRegulationSCP1
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 18
Enc In: 7 Dec In: 7
C Out: 7 d model: 256
n heads: 8 e layers: 2
d layers: 1 d FF: 16
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
268
293
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_SelfRegulationSCP1_MambaSingleLayer_UEA_ftM_sl896_ll0_pl0_dm256_ds16_expand1_dc4_nk18_tvdt1_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
293
loading model
test shape: torch.Size([293, 2]) torch.Size([293, 1])
accuracy:0.9249146757679181
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: SelfRegulationSCP2 Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/SelfRegulationSCP2
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 24
Enc In: 7 Dec In: 7
C Out: 7 d model: 256
n heads: 8 e layers: 2
d layers: 1 d FF: 16
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
200
180
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_SelfRegulationSCP2_MambaSingleLayer_UEA_ftM_sl1152_ll0_pl0_dm256_ds16_expand1_dc4_nk24_tvdt1_tvB1_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
180
loading model
test shape: torch.Size([180, 2]) torch.Size([180, 1])
accuracy:0.65
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: SpokenArabicDigits Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/SpokenArabicDigits
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 3
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 8
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
6599
2199
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_SpokenArabicDigits_MambaSingleLayer_UEA_ftM_sl93_ll0_pl0_dm1024_ds8_expand1_dc4_nk3_tvdt0_tvB1_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
2199
loading model
test shape: torch.Size([2199, 10]) torch.Size([2199, 1])
accuracy:0.9995452478399273
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: StandWalkJump Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/StandWalkJump
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 50
Enc In: 7 Dec In: 7
C Out: 7 d model: 32
n heads: 8 e layers: 2
d layers: 1 d FF: 1
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
12
15
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_StandWalkJump_MambaSingleLayer_UEA_ftM_sl2500_ll0_pl0_dm32_ds1_expand1_dc4_nk50_tvdt1_tvB0_tvC0_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
15
loading model
test shape: torch.Size([15, 3]) torch.Size([15, 1])
accuracy:0.7333333333333333
Using GPU
Args in experiment:
[1mBasic Config[0m
Task Name: classification Is Training: 0
Model ID: UWaveGestureLibrary Model: MambaSingleLayer
[1mData Loader[0m
Data: UEA Root Path: ./dataset/UWaveGestureLibrary
Data Path: ETTh1.csv Features: M
Target: OT Freq: h
Checkpoints: ./checkpoints_best/MambaSL
[1mModel Parameters[0m
Top k: 5 Num Kernels: 7
Enc In: 7 Dec In: 7
C Out: 7 d model: 1024
n heads: 8 e layers: 2
d layers: 1 d FF: 2
Moving Avg: 25 Factor: 1
Distil: 1 Dropout: 0.1
Embed: timeF Activation: gelu
[1mRun Parameters[0m
Num Workers: 10 Itr: 1
Train Epochs: 100 Batch Size: 16
Patience: 10 Learning Rate: 0.001
Des: gating4proposed Loss: MSE
Lradj: type1 Use Amp: 0
[1mGPU[0m
Use GPU: 1 GPU: 0
Use Multi GPU: 0 Devices: 0,1,2,3
[1mDe-stationary Projector Params[0m
P Hidden Dims: 128, 128 P Hidden Layers: 2
Use GPU: cuda:0
120
320
🚀 Lazy Loading: MambaSingleLayer ...
>>>>>>>testing : classification_CLS_UWaveGestureLibrary_MambaSingleLayer_UEA_ftM_sl315_ll0_pl0_dm1024_ds2_expand1_dc4_nk7_tvdt0_tvB0_tvC1_useD0_gating4proposed_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
320
loading model
test shape: torch.Size([320, 8]) torch.Size([320, 1])
accuracy:0.934375
================================================
FILE: scripts/classification/MambaSL.sh
================================================
# This script is for reproducing the MambaSL classification results on the 30 UEA datasets.
# Download checkpoints
# from https://drive.google.com/drive/folders/1dJx_rpB7UnkMuxrCEoHJcXXzhaACS5Sx?usp=share_link (checkpoint_best/MambaSL.zip)
# and change the `checkpoint_dir` variable to the path of the downloaded checkpoints.
# If you want to reproduce the other baseline results reported in MambaSL paper (https://openreview.net/pdf?id=YDl4vqQqGP),
# please refer to the official MambaSL repo: https://github.com/yoom618/MambaSL
# Global Setting
model_name="MambaSingleLayer"
gpu_id=0
resource_dir="."
data_dir="${resource_dir}/dataset"
checkpoint_dir="${resource_dir}/checkpoints_best/MambaSL"
run_model() {
local dn=$1; local dm=$2; local df=$3; local dt=$4; local tb=$5; local tc=$6; local nk=$7; local bs=${8:-16}
python run.py \
--use_gpu --gpu_type cuda --gpu ${gpu_id} \
--task_name classification --data UEA \
--root_path "${data_dir}/${dn}" \
--checkpoints "${checkpoint_dir}" \
--model "${model_name}" \
--model_id "${dn}" \
--d_model $dm --d_ff $df --expand 1 --d_conv 4 \
--tv_dt $dt --tv_B $tb --tv_C $tc --use_D 0 \
--num_kernels $nk \
--is_training 0 --pred_len 0 --label_len 0 --batch_size $bs \
--des gating4proposed --itr 1 --dropout 0.1 \
--learning_rate 0.001 --train_epochs 100 --patience 10
}
# ArticularyWordRecognition
run_model "ArticularyWordRecognition" 128 8 0 0 1 3 16
# AtrialFibrillation
run_model "AtrialFibrillation" 32 16 1 0 0 13 16
# BasicMotions
run_model "BasicMotions" 32 1 0 0 0 3 16
# CharacterTrajectories
run_model "CharacterTrajectories" 128 1 1 0 0 4 16
# Cricket
run_model "Cricket" 32 4 0 1 0 24 16
# DuckDuckGeese
run_model "DuckDuckGeese" 1024 2 0 0 1 6 16
# EigenWorms
run_model "EigenWorms" 32 1 1 1 0 360 4
# Epilepsy
run_model "Epilepsy" 32 1 1 1 0 5 16
# ERing
run_model "ERing" 128 8 1 0 1 3 16
# EthanolConcentration
run_model "EthanolConcentration" 512 4 0 0 0 36 16
# FaceDetection
run_model "FaceDetection" 256 16 1 0 1 3 16
# FingerMovements
run_model "FingerMovements" 32 1 0 1 1 3 16
# HandMovementDirection
run_model "HandMovementDirection" 256 16 1 0 1 8 16
# Handwriting
run_model "Handwriting" 1024 4 1 0 1 4 16
# Heartbeat
run_model "Heartbeat" 64 16 0 0 0 9 16
# InsectWingbeat
run_model "InsectWingbeat" 1024 8 0 0 0 3 16
# JapaneseVowels
run_model "JapaneseVowels" 128 8 1 1 0 3 16
# Libras
run_model "Libras" 1024 4 1 1 1 3 16
# LSST
run_model "LSST" 1024 4 1 1 1 3 16
# MotorImagery
run_model "MotorImagery" 32 8 0 0 0 60 16
# NATOPS
run_model "NATOPS" 512 2 0 1 0 3 16
# PEMS-SF
run_model "PEMS-SF" 512 1 1 1 0 3 16
# PenDigits
run_model "PenDigits" 64 1 0 1 1 3 16
# PhonemeSpectra
run_model "PhonemeSpectra" 256 4 1 1 0 5 16
# RacketSports
run_model "RacketSports" 1024 4 1 0 1 3 16
# SelfRegulationSCP1
run_model "SelfRegulationSCP1" 256 16 1 0 1 18 16
# SelfRegulationSCP2
run_model "SelfRegulationSCP2" 256 16 1 1 1 24 16
# SpokenArabicDigits
run_model "SpokenArabicDigits" 1024 8 0 1 0 3 16
# StandWalkJump
run_model "StandWalkJump" 32 1 1 0 0 50 16
# UWaveGestureLibrary
run_model "UWaveGestureLibrary" 1024 2 0 0 1 7 16
================================================
FILE: scripts/classification/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=PatchTST
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Pyraformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 4 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/Reformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Reformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model TimesNet \
--data UEA \
--e_layers 2 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model TimesNet \
--data UEA \
--e_layers 2 \
--batch_size 16 \
--d_model 64 \
--d_ff 256 \
--top_k 3 \
--num_kernels 4 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model TimesNet \
--data UEA \
--e_layers 2 \
--batch_size 16 \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model TimesNet \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 1 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model TimesNet \
--data UEA \
--e_layers 2 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 60 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model TimesNet \
--data UEA \
--e_layers 6 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model TimesNet \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model TimesNet \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model TimesNet \
--data UEA \
--e_layers 2 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 2 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model TimesNet \
--data UEA \
--e_layers 2 \
--batch_size 16 \
--d_model 32 \
--d_ff 64 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 30 \
--patience 10
================================================
FILE: scripts/classification/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Transformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10
================================================
FILE: scripts/classification/iTransformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 2048 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/FaceDetection/ \
--model_id FaceDetection \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Handwriting/ \
--model_id Handwriting \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/Heartbeat/ \
--model_id Heartbeat \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/JapaneseVowels/ \
--model_id JapaneseVowels \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/PEMS-SF/ \
--model_id PEMS-SF \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP1/ \
--model_id SelfRegulationSCP1 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SelfRegulationSCP2/ \
--model_id SelfRegulationSCP2 \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/SpokenArabicDigits/ \
--model_id SpokenArabicDigits \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/UWaveGestureLibrary/ \
--model_id UWaveGestureLibrary \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--enc_in 3
================================================
FILE: scripts/exogenous_forecast/ECL/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des $des \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des $des \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des $des \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des $des \
--d_model 512 \
--itr 1
================================================
FILE: scripts/exogenous_forecast/EPF/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
des='Timexer-MS'
patch_len=24
python -u run.py \
--is_training 1 \
--task_name long_term_forecast \
--root_path ./dataset/EPF/ \
--data_path NP.csv \
--model_id NP_168_24 \
--model $model_name \
--data custom \
--features MS \
--seq_len 168 \
--pred_len 24 \
--e_layers 3 \
--enc_in 3 \
--dec_in 3 \
--c_out 1 \
--des $des \
--patch_len $patch_len \
--d_model 512 \
--d_ff 512 \
--batch_size 4 \
--itr 1
python -u run.py \
--is_training 1 \
--task_name long_term_forecast \
--root_path ./dataset/EPF/ \
--data_path PJM.csv \
--model_id PJM_168_24 \
--model $model_name \
--data custom \
--features MS \
--seq_len 168 \
--pred_len 24 \
--e_layers 3 \
--enc_in 3 \
--dec_in 3 \
--c_out 1 \
--des $des \
--patch_len $patch_len \
--d_model 512 \
--batch_size 16 \
--itr 1
python -u run.py \
--is_training 1 \
--task_name long_term_forecast \
--root_path ./dataset/EPF/ \
--data_path BE.csv \
--model_id BE_168_24 \
--model $model_name \
--data custom \
--features MS \
--seq_len 168 \
--pred_len 24 \
--e_layers 2 \
--enc_in 3 \
--dec_in 3 \
--c_out 1 \
--des $des \
--patch_len $patch_len \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--itr 1
python -u run.py \
--is_training 1 \
--task_name long_term_forecast \
--root_path ./dataset/EPF/ \
--data_path FR.csv \
--model_id FR_168_24 \
--model $model_name \
--data custom \
--features MS \
--seq_len 168 \
--pred_len 24 \
--e_layers 2 \
--enc_in 3 \
--dec_in 3 \
--c_out 1 \
--des $des \
--patch_len $patch_len \
--batch_size 16 \
--d_model 512 \
--itr 1
python -u run.py \
--is_training 1 \
--task_name long_term_forecast \
--root_path ./dataset/EPF/ \
--data_path DE.csv \
--model_id DE_168_24 \
--model $model_name \
--data custom \
--features MS \
--seq_len 168 \
--pred_len 24 \
--e_layers 1 \
--enc_in 3 \
--dec_in 3 \
--c_out 1 \
--des $des \
--patch_len $patch_len \
--batch_size 4 \
--d_model 512 \
--itr 1
================================================
FILE: scripts/exogenous_forecast/ETTh1/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 512 \
--d_ff 512 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--d_ff 128 \
--batch_size 4 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 512 \
--d_ff 512 \
--batch_size 32 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 512 \
--batch_size 128 \
--des $des \
--itr 1
================================================
FILE: scripts/exogenous_forecast/ETTh2/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--d_ff 128 \
--batch_size 128 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--d_ff 512 \
--batch_size 128 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--d_ff 256 \
--batch_size 16 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des $des \
--itr 1
================================================
FILE: scripts/exogenous_forecast/ETTm1/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--batch_size 128 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--batch_size 128 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--batch_size 128 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--batch_size 128 \
--des $des \
--itr 1
================================================
FILE: scripts/exogenous_forecast/ETTm2/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 512 \
--batch_size 16 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--batch_size 4 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--batch_size 128 \
--des $des \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 128 \
--batch_size 128 \
--des $des \
--itr 1
================================================
FILE: scripts/exogenous_forecast/Traffic/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--des $des \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--des $des \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--des $des \
--batch_size 4 \
--itr 1
================================================
FILE: scripts/exogenous_forecast/Weather/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=TimeXer
des='Timexer-MS'
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des $des \
--d_model 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des $des \
--d_model 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des $des \
--d_model 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features MS \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des $des \
--d_model 128 \
--itr 1
================================================
FILE: scripts/imputation/ECL_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Autoformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/DLinear.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=DLinear
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/ETSformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=ETSformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/FEDformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=FEDformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/Informer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Informer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/LightTS.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=LightTS
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Pyraformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/Reformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Reformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=TimesNet
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ECL_script/iTransformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Autoformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Autoformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Autoformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Autoformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Autoformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Autoformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Autoformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Autoformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Crossformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Crossformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/DLinear_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=DLinear
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/FiLM_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=FiLM
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/MICN_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MICN
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--conv_kernel 12 16 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--conv_kernel 12 16 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--conv_kernel 12 16 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--conv_kernel 12 16 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Nonstationary_Transformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Nonstationary_Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
================================================
FILE: scripts/imputation/ETT_script/TiDE_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TiDE
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 2 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 256 \
--d_ff 256 \
--dropout 0.3 \
--learning_rate 0.1 \
--patience 5 \
--train_epochs 10 \
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/TimesNet_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimesNet
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/TimesNet_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=TimesNet
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/TimesNet_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TimesNet
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/TimesNet_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=TimesNet
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Transformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Transformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Transformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/Transformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/ETT_script/iTransformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/Weather_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Autoformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/Weather_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=TimesNet
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--top_k 3 \
--learning_rate 0.001
================================================
FILE: scripts/imputation/Weather_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.125 \
--mask_rate 0.125 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.25 \
--mask_rate 0.25 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.375 \
--mask_rate 0.375 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
python -u run.py \
--task_name imputation \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_mask_0.5 \
--mask_rate 0.5 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 0 \
--pred_len 0 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--batch_size 16 \
--d_model 128 \
--d_ff 128 \
--des 'Exp' \
--itr 1 \
--top_k 5 \
--learning_rate 0.001
================================================
FILE: scripts/long_term_forecast/AugmentSample/Classification/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=PatchTST
for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp wdba discdtw discsdtw
do
echo using augmentation: ${aug}
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--augmentation_ratio 1 \
--${aug}
done
================================================
FILE: scripts/long_term_forecast/AugmentSample/Forecasting/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=PatchTST
for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp discdtw discsdtw
do
for pred_len in 96 192 336 720
do
echo using augmentation: ${aug}
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_${pred_len} \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len ${pred_len} \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--augmentation_ratio 1 \
--${aug}
done
done
================================================
FILE: scripts/long_term_forecast/AugmentSample/ReadMe.md
================================================
# Augmentation Feature Roadbook
Hi there! For those who are interested in testing
augmentation techniques in `Time-Series-Library`.
For now, we have embedded several augmentation methods
in this repo. We are still collecting publicly available
augmentation algorithms, and we appreciate your valuable
advice!
```
The Implemented Augmentation Methods
1. jitter
2. scaling
3. permutation
4. magwarp
5. timewarp
6. windowslice
7. windowwarp
8. rotation
9. spawner
10. dtwwarp
11. shapedtwwarp
12. wdba (Specially Designed for Classification tasks)
13. discdtw
```
## Usage
In this folder, we present two sample of shell scripts
doing augmentation in `Forecasting` and `Classification`
tasks.
Take `Forecasting` task for example, we test multiple
augmentation algorithms on `EthanolConcentration` dataset
(a subset of the popular classification benchmark `UEA`)
using `PatchTST` model.
```shell
export CUDA_VISIBLE_DEVICES=0
model_name=PatchTST
for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp wdba discdtw discsdtw
do
echo using augmentation: ${aug}
python -u run.py \
--task_name classification \
--is_training 1 \
--root_path ./dataset/EthanolConcentration/ \
--model_id EthanolConcentration \
--model $model_name \
--data UEA \
--e_layers 3 \
--batch_size 16 \
--d_model 128 \
--d_ff 256 \
--top_k 3 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--train_epochs 100 \
--patience 10 \
--augmentation_ratio 1 \
--${aug}
done
```
Here, parameter `augmentation_ratio` represents how many
times do we want to perform our augmentation method.
Parameter `${aug}` represents a string of augmentation
type label.
The example here only perform augmentation once, so we
can set `augmentation_ratio` to `1`, followed by one
augmentation type label. Trivially, you can set
`augmentation_ratio` to an integer `num` followed by
`num` augmentation type labels.
The augmentation code obeys the same prototype of
`Time-Series-Library`. If you want to adjust other
training parameters, feel free to add arguments to the
shell scripts and play around. The full list of parameters
can be seen in `run.py`.
## Contact Us!
This piece of code is written and maintained by
[Yunzhong Qiu](https://github.com/DigitalLifeYZQiu).
We thank [Haixu Wu](https://github.com/wuhaixu2016) and
[Jiaxiang Dong](https://github.com/dongjiaxiang) for
insightful discussion and solid support.
If you have difficulties or find bugs in our code, please
contact us:
- Email: qiuyz24@mails.tsinghua.edu.cn
================================================
FILE: scripts/long_term_forecast/ECL_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 16 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/DLinear.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=DLinear
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/ETSformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=ETSformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/FEDformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=FEDformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features S \
--seq_len 192 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features S \
--seq_len 192 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features S \
--seq_len 192 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features S \
--seq_len 192 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Informer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Informer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Koopa.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_192_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_288_144 \
--model $model_name \
--data custom \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_384_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/LightTS.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=LightTS
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Mamba.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
# for pred_len in 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_$pred_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--enc_in 321 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 321 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/ECL_script/MultiPatchFormer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MultiPatchFormer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Nonstationary_Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 2048
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 2048
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 2048
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 2048
================================================
FILE: scripts/long_term_forecast/ECL_script/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 16 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Reformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Reformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/SegRNN.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 321 \
--d_model 512 \
--dropout 0 \
--learning_rate 0.001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/ECL_script/TSMixer.sh
================================================
model_name=TSMixer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/TimeMixer.sh
================================================
#export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=3
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=16
d_ff=32
batch_size=32
train_epochs=20
patience=10
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_$seq_len'_'96 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_$seq_len'_'192 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_$seq_len'_'336 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_$seq_len'_'720 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/ECL_script/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 4 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_ff 512 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 4 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--batch_size 4 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--d_model 256 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features S \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features S \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features S \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features S \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ECL_script/WPMixer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=electricity
seq_lens=(512 512 512 512)
pred_lens=(96 192 336 720)
learning_rates=(0.00328086 0.000493286 0.002505375 0.001977516)
batches=(32 32 32 32)
epochs=(100 100 100 100)
dropouts=(0.1 0.1 0.2 0.1)
patch_lens=(16 16 16 16)
lradjs=(type3 type3 type3 type3)
d_models=(32 32 32 32)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(sym3 coif5 sym4 db2)
levels=(2 3 1 2)
tfactors=(3 7 5 7)
dfactors=(5 5 7 8)
strides=(8 8 8 8)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/electricity/ \
--data_path electricity.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/ECL_script/iTransformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.0005 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.0005 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.0005 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/electricity/ \
--data_path electricity.csv \
--model_id ECL_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 321 \
--dec_in 321 \
--c_out 321 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.0005 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Autoformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Crossformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/DLinear_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=DLinear
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/ETSformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=ETSformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/FEDformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=FEDformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 336 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 336 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 336 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 336 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 168 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 168 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 168 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 168 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/FiLM_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Informer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Informer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_48 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_192_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_288_144 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_384_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_48 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_192_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_288_144 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_384_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_48 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_192_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_288_144 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_384_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Koopa_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_48 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_192_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_288_144 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_384_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/LTSM.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Chronos2
seq_len=2048
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name zero_shot_forecast \
--is_training 0 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$seq_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name zero_shot_forecast \
--is_training 0 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_$seq_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 7 \
--d_model 256 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
for pred_len in 192 336 720
do
python -u run.py \
--task_name zero_shot_forecast \
--is_training 0 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_$seq_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name zero_shot_forecast \
--is_training 0 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_$seq_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/ETT_script/LightTS_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=LightTS
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/MICN_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/MICN_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/MICN_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/MICN_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--top_k 5 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/MambaSimple_ETTh1.sh
================================================
model_name=MambaSimple
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$pred_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 7 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/ETT_script/Mamba_ETT_all.sh
================================================
./scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh | tee mamba_ett.txt
./scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh | tee mamba_ett.txt -a
./scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh | tee mamba_ett.txt -a
./scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh | tee mamba_ett.txt -a
================================================
FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$pred_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 7 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_$pred_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--enc_in 7 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 7 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_$pred_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--enc_in 7 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 7 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_$pred_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--enc_in 7 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 7 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MultiPatchFormer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MultiPatchFormer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 128
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 128
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 128
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
--d_model 128
================================================
FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 256 256 \
--p_hidden_layers 4 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2 \
================================================
FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 16 16 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 16 16 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 16 16 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 16 16 \
--p_hidden_layers 4
================================================
FILE: scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 256 256 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 256 256 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 16 16 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 16 16 \
--p_hidden_layers 4
================================================
FILE: scripts/long_term_forecast/ETT_script/PAttn_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=PAttn
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 2 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 8 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 8 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 16 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 2 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 8 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 8 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 16 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 2 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 2 \
--batch_size 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--batch_size 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--batch_size 128 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/PatchTST_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 16 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 2 \
--batch_size 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--batch_size 128 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Pyraformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Reformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Reformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$seq_len'_'$pred_len \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_$seq_len'_'$pred_len \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_$seq_len'_'$pred_len \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 48 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/ETT_script/SegRNN_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_$seq_len'_'$pred_len \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 48 \
--enc_in 7 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTh1.sh
================================================
model_name=TSMixer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTh2.sh
================================================
model_name=TSMixer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTm1.sh
================================================
model_name=TSMixer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TSMixer_ETTm2.sh
================================================
model_name=TSMixer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TiDE_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TiDE
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 2 \
--enc_in 7 \
--dec_in 7 \
--c_out 8 \
--d_model 256 \
--d_ff 256 \
--dropout 0.3 \
--batch_size 512 \
--learning_rate 0.1 \
--patience 5 \
--train_epochs 10 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 2 \
--enc_in 7 \
--dec_in 7 \
--c_out 8 \
--d_model 256 \
--d_ff 256 \
--dropout 0.3 \
--batch_size 512 \
--learning_rate 0.1 \
--patience 5 \
--train_epochs 10 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 2 \
--enc_in 7 \
--dec_in 7 \
--c_out 8 \
--d_model 256 \
--d_ff 256 \
--dropout 0.3 \
--batch_size 512 \
--learning_rate 0.1 \
--patience 5 \
--train_epochs 10 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 2 \
--enc_in 7 \
--dec_in 7 \
--c_out 8 \
--d_model 256 \
--d_ff 256 \
--dropout 0.3 \
--batch_size 512 \
--learning_rate 0.1 \
--patience 5 \
--train_epochs 10 \
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=2
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=16
d_ff=32
train_epochs=10
patience=10
batch_size=16
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/\
--data_path ETTh1.csv \
--model_id ETTh1_$seq_len'_'96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--batch_size 128 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$seq_len'_'192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--batch_size 128 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$seq_len'_'336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--batch_size 128 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_$seq_len'_'720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--batch_size 128 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=2
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=16
d_ff=32
batch_size=16
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/\
--data_path ETTh2.csv \
--model_id ETTh2_$seq_len'_'96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_$seq_len'_'192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_$seq_len'_'336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_$seq_len'_'720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=2
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=16
d_ff=32
batch_size=16
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/\
--data_path ETTm1.csv \
--model_id ETTm1_$seq_len'_'96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_$seq_len'_'192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_$seq_len'_'336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_$seq_len'_'720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeMixer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=2
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=32
d_ff=32
batch_size=16
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/\
--data_path ETTm2.csv \
--model_id ETTm2_$seq_len'_'96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_$seq_len'_'192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_$seq_len'_'336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_$seq_len'_'720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--enc_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--batch_size 4 \
--des 'exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 128 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 512 \
--d_ff 1024 \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 256 \
--d_ff 1024 \
--batch_size 16 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 256 \
--d_ff 1024 \
--batch_size 16 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 256 \
--d_ff 1024 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 512 \
--d_ff 1024 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 256 \
--d_ff 1024 \
--batch_size 16 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--batch_size 4 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 256 \
--batch_size 4 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 1024 \
--batch_size 4 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 512 \
--batch_size 4 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TimeXer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 256 \
--d_ff 1024 \
--batch_size 16 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 512 \
--d_ff 1024 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 512 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 5
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 5
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 5
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 16 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--top_k 5
================================================
FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/TimesNet_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTh1.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_96 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_192 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_336 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh1.csv \
--model_id ETTh1_96_720 \
--model $model_name \
--data ETTh1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_96 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_192 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_336 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm1.csv \
--model_id ETTm1_96_720 \
--model $model_name \
--data ETTm1 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/Transformer_ETTm2.sh
================================================
export CUDA_VISIBLE_DEVICES=2
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_96 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_192 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_336 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTm2.csv \
--model_id ETTm2_96_720 \
--model $model_name \
--data ETTm2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 1 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTh1.sh
================================================
# Set the GPU to use
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=ETTh1
seq_lens=(512 512 512 512)
pred_lens=(96 192 336 720)
learning_rates=(0.000242438 0.000201437 0.000132929 0.000239762)
batches=(256 256 256 256)
epochs=(30 30 30 30)
dropouts=(0.4 0.05 0.0 0.2)
patch_lens=(16 16 16 16)
lradjs=(type3 type3 type3 type3)
d_models=(256 256 256 128)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(db2 db3 db2 db2)
levels=(2 2 1 1)
tfactors=(5 5 3 5)
dfactors=(8 5 3 3)
strides=(8 8 8 8)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/ETT/ \
--data_path ETTh1.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=ETTh2
seq_lens=(512 512 512 512)
pred_lens=(96 192 336 720)
learning_rates=(0.000466278 0.000294929 0.000617476 0.000810205)
batches=(256 256 256 256)
epochs=(30 30 30 30)
dropouts=(0.0 0.0 0.1 0.4)
patch_lens=(16 16 16 16)
lradjs=(type3 type3 type3 type3)
d_models=(256 256 128 128)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(db2 db2 db2 db2)
levels=(2 3 5 5)
tfactors=(5 3 5 5)
dfactors=(5 8 3 5)
strides=(8 8 8 8)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/ETT/ \
--data_path ETTh2.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTm1.sh
================================================
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=ETTm1
seq_lens=(512 512 512 512)
pred_lens=(96 192 336 720)
learning_rates=(0.001277976 0.002415901 0.001594735 0.002011441)
batches=(256 256 256 256)
epochs=(80 80 80 80)
dropouts=(0.4 0.4 0.4 0.4)
patch_lens=(48 48 48 48)
lradjs=(type3 type3 type3 type3)
d_models=(256 128 256 128)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(db2 db3 db5 db5)
levels=(1 1 1 4)
tfactors=(5 3 7 3)
dfactors=(3 7 7 8)
strides=(24 24 24 24)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/ETT/ \
--data_path ETTm1.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/ETT_script/WPMixer_ETTm2.sh
================================================
# Set the GPU to use
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=ETTm2
seq_lens=(512 512 512 512)
pred_lens=(96 192 336 720)
learning_rates=(0.00076587 0.000275775 0.000234608 0.001039536)
batches=(256 256 256 256)
epochs=(80 80 80 80)
dropouts=(0.4 0.2 0.4 0.4)
patch_lens=(48 48 48 48)
lradjs=(type3 type3 type3 type3)
d_models=(256 256 256 256)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(bior3.1 db2 db2 db2)
levels=(1 1 1 1)
tfactors=(3 3 3 3)
dfactors=(8 7 5 8)
strides=(24 24 24 24)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/ETT/ \
--data_path ETTm2.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/ETT_script/iTransformer_ETTh2.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_96 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 128 \
--d_ff 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_192 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 128 \
--d_ff 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_336 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 128 \
--d_ff 128 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/ETT-small/ \
--data_path ETTh2.csv \
--model_id ETTh2_96_720 \
--model $model_name \
--data ETTh2 \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--d_model 128 \
--d_ff 128 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 384 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 384 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/Koopa.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_192_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_288_144 \
--model $model_name \
--data custom \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_384_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/Mamba.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_$pred_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--enc_in 8 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 8 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/Exchange_script/Nonstationary_Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 64 64 64 64 \
--p_hidden_layers 4
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
================================================
FILE: scripts/long_term_forecast/Exchange_script/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/Exchange_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/exchange_rate/ \
--data_path exchange_rate.csv \
--model_id Exchange_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 8 \
--dec_in 8 \
--c_out 8 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--dropout 0.6 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
-dropout 0.6 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
-dropout 0.6 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
-dropout 0.6 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=5
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model FiLM \
--data custom \
--features M \
--seq_len 60 \
--label_len 18 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model FiLM \
--data custom \
--features M \
--seq_len 60 \
--label_len 18 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model FiLM \
--data custom \
--features M \
--seq_len 60 \
--label_len 18 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model FiLM \
--data custom \
--features M \
--seq_len 60 \
--label_len 18 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/Koopa.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_48_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 48 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_72_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 72 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_96_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_120_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 120 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 36 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 36 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 36 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 36 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/Nonstationary_Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 32 32 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 32 32 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 16 16 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 8 8 \
--p_hidden_layers 2
================================================
FILE: scripts/long_term_forecast/ILI_script/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 24 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--d_model 1024\
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 36 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--d_model 2048\
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 48 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 4 \
--d_model 2048\
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 60 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--n_heads 16 \
--d_model 2048\
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--d_model 768 \
--d_ff 768 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/ILI_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_24 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 24 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_36 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 36 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/illness/ \
--data_path national_illness.csv \
--model_id ili_36_60 \
--model $model_name \
--data custom \
--features M \
--seq_len 36 \
--label_len 18 \
--pred_len 60 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 7 \
--dec_in 7 \
--c_out 7 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Mamba_all.sh
================================================
./scripts/long_term_forecast/ECL_script/Mamba.sh
./scripts/long_term_forecast/Traffic_script/Mamba.sh
./scripts/long_term_forecast/Exchange_script/Mamba.sh
./scripts/long_term_forecast/Weather_script/Mamba.sh
================================================
FILE: scripts/long_term_forecast/Traffic_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
================================================
FILE: scripts/long_term_forecast/Traffic_script/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--top_k 5 \
--des 'Exp' \
--n_heads 2 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--top_k 5 \
--des 'Exp' \
--n_heads 2 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--top_k 5 \
--des 'Exp' \
--n_heads 2 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--top_k 5 \
--des 'Exp' \
--n_heads 2 \
--batch_size 4 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=FiLM
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--batch_size 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--batch_size 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--batch_size 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--batch_size 2
================================================
FILE: scripts/long_term_forecast/Traffic_script/Koopa.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=Koopa
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_48 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--pred_len 48 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_192_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 192 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_288_144 \
--model $model_name \
--data custom \
--features M \
--seq_len 288 \
--pred_len 144 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_384_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 384 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/Mamba.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_$pred_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--enc_in 862 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 862 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/Traffic_script/MultiPatchFormer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MultiPatchFormer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/Nonstationary_Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3 \
--p_hidden_dims 128 128 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3 \
--p_hidden_dims 128 128 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3 \
--p_hidden_dims 16 16 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3 \
--p_hidden_dims 128 128 \
--p_hidden_layers 2
================================================
FILE: scripts/long_term_forecast/Traffic_script/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--batch_size 4 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
================================================
FILE: scripts/long_term_forecast/Traffic_script/SegRNN.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 24 \
--enc_in 862 \
--d_model 512 \
--dropout 0 \
--learning_rate 0.001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/Traffic_script/TSMixer.sh
================================================
model_name=TSMixer
learning_rate=0.001
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
================================================
FILE: scripts/long_term_forecast/Traffic_script/TimeMixer.sh
================================================
#export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=3
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=32
d_ff=64
batch_size=8
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id Traffic_$seq_len'_'96 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id Traffic_$seq_len'_'192 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id Traffic_$seq_len'_'336 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id Traffic_$seq_len'_'720 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size $batch_size \
--learning_rate $learning_rate \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/Traffic_script/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 3 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--des 'Exp' \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--des 'Exp' \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--des 'Exp' \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--des 'Exp' \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--d_model 512 \
--d_ff 512 \
--top_k 5 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Traffic_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
================================================
FILE: scripts/long_term_forecast/Traffic_script/WPMixer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=traffic
seq_lens=(1200 1200 1200 1200)
pred_lens=(96 192 336 720)
learning_rates=(0.0010385 0.000567053 0.001026715 0.001496217)
batches=(16 16 16 16)
epochs=(60 60 50 60)
dropouts=(0.05 0.05 0.0 0.05)
patch_lens=(16 16 16 16)
lradjs=(type3 type3 type3 type3)
d_models=(16 32 32 32)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(db3 db3 bior3.1 db3)
levels=(1 1 1 1)
tfactors=(3 3 7 7)
dfactors=(5 5 7 3)
strides=(8 8 8 8)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/traffic/ \
--data_path traffic.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/Traffic_script/iTransformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/traffic/ \
--data_path traffic.csv \
--model_id traffic_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 4 \
--d_layers 1 \
--factor 3 \
--enc_in 862 \
--dec_in 862 \
--c_out 862 \
--des 'Exp' \
--d_model 512 \
--d_ff 512 \
--batch_size 16 \
--learning_rate 0.001 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/Autoformer.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Autoformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--train_epochs 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/Crossformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Crossformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/Weather_script/FiLM.sh
================================================
export CUDA_VISIBLE_DEVICES=6
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model FiLM \
--data custom \
--features M \
--seq_len 720 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model FiLM \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model FiLM \
--data custom \
--features M \
--seq_len 192 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model FiLM \
--data custom \
--features M \
--seq_len 336 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/MICN.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=MICN
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 96 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/Weather_script/Mamba.sh
================================================
model_name=Mamba
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_$pred_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $pred_len \
--label_len 48 \
--pred_len $pred_len \
--e_layers 2 \
--d_layers 1 \
--enc_in 21 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 21 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
done
================================================
FILE: scripts/long_term_forecast/Weather_script/MultiPatchFormer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=MultiPatchFormer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 1 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 256 \
--d_ff 512 \
--des 'Exp' \
--n_heads 8 \
--batch_size 32 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/Nonstationary_Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=6
model_name=Nonstationary_Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--train_epochs 3 \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 128 128 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 128 128 \
--p_hidden_layers 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--p_hidden_dims 128 128 \
--p_hidden_layers 2
================================================
FILE: scripts/long_term_forecast/Weather_script/PatchTST.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=PatchTST
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--n_heads 4 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--n_heads 16 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--n_heads 4 \
--batch_size 128 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--n_heads 4 \
--batch_size 128 \
--train_epochs 3
================================================
FILE: scripts/long_term_forecast/Weather_script/Pyraformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Pyraformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--train_epochs 2
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/SegRNN.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=SegRNN
seq_len=96
for pred_len in 96 192 336 720
do
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_$seq_len'_'$pred_len \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--pred_len $pred_len \
--seg_len 48 \
--enc_in 21 \
--d_model 512 \
--dropout 0.5 \
--learning_rate 0.0001 \
--des 'Exp' \
--itr 1
done
================================================
FILE: scripts/long_term_forecast/Weather_script/TSMixer.sh
================================================
model_name=TSMixer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
================================================
FILE: scripts/long_term_forecast/Weather_script/TimeMixer.sh
================================================
#export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
seq_len=96
e_layers=3
down_sampling_layers=3
down_sampling_window=2
learning_rate=0.01
d_model=16
d_ff=32
batch_size=16
train_epochs=20
patience=10
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 96 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 192 \
--e_layers $e_layers \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 336 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len $seq_len \
--label_len 0 \
--pred_len 720 \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--d_model $d_model \
--d_ff $d_ff \
--batch_size 128 \
--learning_rate $learning_rate \
--train_epochs $train_epochs \
--patience $patience \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window
================================================
FILE: scripts/long_term_forecast/Weather_script/TimeXer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeXer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 256 \
--d_ff 512 \
--batch_size 4 \
--itr 1 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 128 \
--d_ff 1024 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 256 \
--batch_size 4 \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 128 \
--batch_size 4 \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/TimesNet.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=TimesNet
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--train_epochs 1
================================================
FILE: scripts/long_term_forecast/Weather_script/Transformer.sh
================================================
export CUDA_VISIBLE_DEVICES=7
model_name=Transformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1 \
--train_epochs 3
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--itr 1
================================================
FILE: scripts/long_term_forecast/Weather_script/WPMixer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
# Model name
model_name=WPMixer
# Datasets and prediction lengths
dataset=weather
seq_lens=(512 512 512 512)
pred_lens=(96 192 336 720)
learning_rates=(0.000913333 0.001379042 0.000607991 0.001470479)
batches=(32 64 32 128)
epochs=(60 60 60 60)
dropouts=(0.4 0.4 0.4 0.4)
patch_lens=(16 16 16 16)
lradjs=(type3 type3 type3 type3)
d_models=(256 128 128 128)
patiences=(12 12 12 12)
# Model params below need to be set in WPMixer.py Line 15, instead of this script
wavelets=(db3 db3 db3 db2)
levels=(2 1 2 1)
tfactors=(3 3 7 7)
dfactors=(7 7 7 5)
strides=(8 8 8 8)
# Loop over datasets and prediction lengths
for i in "${!pred_lens[@]}"; do
python -u run.py \
--is_training 1 \
--root_path ./data/weather/ \
--data_path weather.csv \
--model_id wpmixer \
--model $model_name \
--task_name long_term_forecast \
--data $dataset \
--seq_len ${seq_lens[$i]} \
--pred_len ${pred_lens[$i]} \
--label_len 0 \
--d_model ${d_models[$i]} \
--patch_len ${patch_lens[$i]} \
--batch_size ${batches[$i]} \
--learning_rate ${learning_rates[$i]} \
--lradj ${lradjs[$i]} \
--dropout ${dropouts[$i]} \
--patience ${patiences[$i]} \
--train_epochs ${epochs[$i]} \
--use_amp
done
================================================
FILE: scripts/long_term_forecast/Weather_script/iTransformer.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_96 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 96 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 512\
--d_ff 512\
--itr 1 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_192 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 192 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 512\
--d_ff 512\
--itr 1 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_336 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 336 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 512\
--d_ff 512\
--itr 1 \
python -u run.py \
--task_name long_term_forecast \
--is_training 1 \
--root_path ./dataset/weather/ \
--data_path weather.csv \
--model_id weather_96_720 \
--model $model_name \
--data custom \
--features M \
--seq_len 96 \
--label_len 48 \
--pred_len 720 \
--e_layers 3 \
--d_layers 1 \
--factor 3 \
--enc_in 21 \
--dec_in 21 \
--c_out 21 \
--des 'Exp' \
--d_model 512\
--d_ff 512\
--itr 1
================================================
FILE: scripts/short_term_forecast/Autoformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Autoformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/Crossformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=5
model_name=Crossformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 16 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/DLinear_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=DLinear
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/ETSformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=ETSformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ../dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 2 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/FEDformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=FEDformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/FiLM_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=3
model_name=FiLM
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 16 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/Informer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Informer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/LightTS_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=LightTS
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/MICN_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=4
model_name=MICN
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 16 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/Mamba_M4.sh
================================================
# export CUDA_VISIBLE_DEVICES=1
model_name=Mamba
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--enc_in 1 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 1 \
--batch_size 16 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--enc_in 1 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 1 \
--batch_size 16 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--enc_in 1 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 1 \
--batch_size 16 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--enc_in 1 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 1 \
--batch_size 16 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--enc_in 1 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 1 \
--batch_size 16 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--enc_in 1 \
--expand 2 \
--d_ff 16 \
--d_conv 4 \
--c_out 1 \
--batch_size 16 \
--d_model 128 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/Nonstationary_Transformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Nonstationary_Transformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE' \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE' \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE' \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE' \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE' \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE' \
--p_hidden_dims 256 256 \
--p_hidden_layers 2
================================================
FILE: scripts/short_term_forecast/Pyraformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Pyraformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/Reformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Reformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/TSMixer_M4.sh
================================================
#export CUDA_VISIBLE_DEVICES=1
model_name=MTSMixer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/TimeMixer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimeMixer
e_layers=4
down_sampling_layers=1
down_sampling_window=2
learning_rate=0.01
d_model=32
d_ff=32
batch_size=16
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 128 \
--d_model $d_model \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
--train_epochs 50 \
--patience 20 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 128 \
--d_model $d_model \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
--train_epochs 50 \
--patience 20 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 128 \
--d_model $d_model \
--d_ff 64 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
--train_epochs 50 \
--patience 20 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 128 \
--d_model $d_model \
--d_ff 16 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
--train_epochs 50 \
--patience 20 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 128 \
--d_model $d_model \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
--train_epochs 50 \
--patience 20 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers $e_layers \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 128 \
--d_model $d_model \
--d_ff 32 \
--des 'Exp' \
--itr 1 \
--learning_rate $learning_rate \
--train_epochs 50 \
--patience 20 \
--down_sampling_layers $down_sampling_layers \
--down_sampling_method avg \
--down_sampling_window $down_sampling_window \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/TimesNet_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=TimesNet
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 64 \
--d_ff 64 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 16 \
--d_ff 16 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 32 \
--d_ff 32 \
--top_k 5 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/Transformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=1
model_name=Transformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: scripts/short_term_forecast/iTransformer_M4.sh
================================================
export CUDA_VISIBLE_DEVICES=0
model_name=iTransformer
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Monthly' \
--model_id m4_Monthly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Yearly' \
--model_id m4_Yearly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Quarterly' \
--model_id m4_Quarterly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Weekly' \
--model_id m4_Weekly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Daily' \
--model_id m4_Daily \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
python -u run.py \
--task_name short_term_forecast \
--is_training 1 \
--root_path ./dataset/m4 \
--seasonal_patterns 'Hourly' \
--model_id m4_Hourly \
--model $model_name \
--data m4 \
--features M \
--e_layers 2 \
--d_layers 1 \
--factor 3 \
--enc_in 1 \
--dec_in 1 \
--c_out 1 \
--batch_size 16 \
--d_model 512 \
--des 'Exp' \
--itr 1 \
--learning_rate 0.001 \
--loss 'SMAPE'
================================================
FILE: tutorial/TimesNet_tutorial.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TimesNet Tutorial\n",
"**Set-up instructions:** this notebook give a tutorial on the learning task supported by `TimesNet`.\n",
"\n",
"`TimesNet` can support basically 5 tasks, which are respectively long-term forecast, short-term forecast, imputation, anomaly detection, classification."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Install Python 3.8. For convenience, execute the following command."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"pip install -r requirements.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Package Import"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import torch \n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.fft\n",
"from layers.Embed import DataEmbedding\n",
"from layers.Conv_Blocks import Inception_Block_V1 \n",
" #convolution block used for convoluting the 2D time data, changeable"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. TimesBlock Construction\n",
" The core idea of `TimesNet` lies in the construction of `TimesBlock`, which generally gets the base frequencies by implementing FFT on the data, and then reshapes the times series to 2D variation respectively from the main base frequencies, followed by a 2D convolution whose outputs are reshaped back and added with weight to form the final output.\n",
"\n",
" In the following section, we will have a detailed view on `TimesBlock`.\n",
"\n",
" TimesBlock has 2 members. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class TimesBlock(nn.Module):\n",
" def __init__(self, configs):\n",
" ...\n",
" \n",
" def forward(self, x):\n",
" ..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's focus on ```__init__(self, configs):```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def __init__(self, configs): ##configs is the configuration defined for TimesBlock\n",
" super(TimesBlock, self).__init__() \n",
" self.seq_len = configs.seq_len ##sequence length \n",
" self.pred_len = configs.pred_len ##prediction length\n",
" self.k = configs.top_k ##k denotes how many top frequencies are \n",
" #taken into consideration\n",
" # parameter-efficient design\n",
" self.conv = nn.Sequential(\n",
" Inception_Block_V1(configs.d_model, configs.d_ff,\n",
" num_kernels=configs.num_kernels),\n",
" nn.GELU(),\n",
" Inception_Block_V1(configs.d_ff, configs.d_model,\n",
" num_kernels=configs.num_kernels)\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then, have a look at ```forward(self, x)```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def forward(self, x):\n",
" B, T, N = x.size()\n",
" #B: batch size T: length of time series N:number of features\n",
" period_list, period_weight = FFT_for_Period(x, self.k)\n",
" #FFT_for_Period() will be shown later. Here, period_list([top_k]) denotes \n",
" #the top_k-significant period and period_weight([B, top_k]) denotes its weight(amplitude)\n",
"\n",
" res = []\n",
" for i in range(self.k):\n",
" period = period_list[i]\n",
"\n",
" # padding : to form a 2D map, we need total length of the sequence, plus the part \n",
" # to be predicted, to be divisible by the period, so padding is needed\n",
" if (self.seq_len + self.pred_len) % period != 0:\n",
" length = (\n",
" ((self.seq_len + self.pred_len) // period) + 1) * period\n",
" padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)\n",
" out = torch.cat([x, padding], dim=1)\n",
" else:\n",
" length = (self.seq_len + self.pred_len)\n",
" out = x\n",
"\n",
" # reshape: we need each channel of a single piece of data to be a 2D variable,\n",
" # Also, in order to implement the 2D conv later on, we need to adjust the 2 dimensions \n",
" # to be convolutioned to the last 2 dimensions, by calling the permute() func.\n",
" # Whereafter, to make the tensor contiguous in memory, call contiguous()\n",
" out = out.reshape(B, length // period, period,\n",
" N).permute(0, 3, 1, 2).contiguous()\n",
" \n",
" #2D convolution to grap the intra- and inter- period information\n",
" out = self.conv(out)\n",
"\n",
" # reshape back, similar to reshape\n",
" out = out.permute(0, 2, 3, 1).reshape(B, -1, N)\n",
" \n",
" #truncating down the padded part of the output and put it to result\n",
" res.append(out[:, :(self.seq_len + self.pred_len), :])\n",
" res = torch.stack(res, dim=-1) #res: 4D [B, length , N, top_k]\n",
"\n",
" # adaptive aggregation\n",
" #First, use softmax to get the normalized weight from amplitudes --> 2D [B,top_k]\n",
" period_weight = F.softmax(period_weight, dim=1) \n",
"\n",
" #after two unsqueeze(1),shape -> [B,1,1,top_k],so repeat the weight to fit the shape of res\n",
" period_weight = period_weight.unsqueeze(\n",
" 1).unsqueeze(1).repeat(1, T, N, 1)\n",
" \n",
" #add by weight the top_k periods' result, getting the result of this TimesBlock\n",
" res = torch.sum(res * period_weight, -1)\n",
"\n",
" # residual connection\n",
" res = res + x\n",
" return res"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The ```FFT_for_Period``` above is given by:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def FFT_for_Period(x, k=2):\n",
" # xf shape [B, T, C], denoting the amplitude of frequency(T) given the datapiece at B,N\n",
" xf = torch.fft.rfft(x, dim=1) \n",
"\n",
" # find period by amplitudes: here we assume that the periodic features are basically constant\n",
" # in different batch and channel, so we mean out these two dimensions, getting a list frequency_list with shape[T] \n",
" # each element at pos t of frequency_list denotes the overall amplitude at frequency (t)\n",
" frequency_list = abs(xf).mean(0).mean(-1) \n",
" frequency_list[0] = 0\n",
"\n",
" #by torch.topk(),we can get the biggest k elements of frequency_list, and its positions(i.e. the k-main frequencies in top_list)\n",
" _, top_list = torch.topk(frequency_list, k)\n",
"\n",
" #Returns a new Tensor 'top_list', detached from the current graph.\n",
" #The result will never require gradient.Convert to a numpy instance\n",
" top_list = top_list.detach().cpu().numpy()\n",
" \n",
" #period:a list of shape [top_k], recording the periods of mean frequencies respectively\n",
" period = x.shape[1] // top_list\n",
"\n",
" #Here,the 2nd item returned has a shape of [B, top_k],representing the biggest top_k amplitudes \n",
" # for each piece of data, with N features being averaged.\n",
" return period, abs(xf).mean(-1)[:, top_list] "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To make it clearer, please see the figures below.\n",
"\n",
"\n",
"\n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For more details, please read the our paper \n",
"(link: https://openreview.net/pdf?id=ju_Uqw384Oq)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4. TimesNet\n",
"\n",
"So far we've got `TimesBlock`, which is excel at retrieving intra- and inter- period temporal information. We become capable of building a `TimesNet`. `TimesNet` is proficient in multitasks including short- and long-term forecasting, imputation, classification, and anomaly detection.\n",
"\n",
"In this section, we'll have a detailed overview on how `TimesNet` gains its power in these tasks."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Model(nn.Module):\n",
" def __init__(self, configs):\n",
" ...\n",
" \n",
" def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):\n",
" ...\n",
"\n",
" def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n",
" ...\n",
"\n",
" def anomaly_detection(self, x_enc):\n",
" ...\n",
" \n",
" def classification(self, x_enc, x_mark_enc):\n",
" ...\n",
"\n",
" def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n",
" ..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First of all, let's focus on ```__init__(self, configs):```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def __init__(self, configs):\n",
" super(Model, self).__init__()\n",
" #params init\n",
" self.configs = configs\n",
" self.task_name = configs.task_name\n",
" self.seq_len = configs.seq_len\n",
" self.label_len = configs.label_len\n",
" self.pred_len = configs.pred_len\n",
"\n",
" #stack TimesBlock for e_layers times to form the main part of TimesNet, named model\n",
" self.model = nn.ModuleList([TimesBlock(configs)\n",
" for _ in range(configs.e_layers)])\n",
" \n",
" #embedding & normalization\n",
" # enc_in is the encoder input size, the number of features for a piece of data\n",
" # d_model is the dimension of embedding\n",
" self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,\n",
" configs.dropout)\n",
" self.layer = configs.e_layers # num of encoder layers\n",
" self.layer_norm = nn.LayerNorm(configs.d_model)\n",
"\n",
" #define the some layers for different tasks\n",
" if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':\n",
" self.predict_linear = nn.Linear(\n",
" self.seq_len, self.pred_len + self.seq_len)\n",
" self.projection = nn.Linear(\n",
" configs.d_model, configs.c_out, bias=True)\n",
" if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':\n",
" self.projection = nn.Linear(\n",
" configs.d_model, configs.c_out, bias=True)\n",
" if self.task_name == 'classification':\n",
" self.act = F.gelu\n",
" self.dropout = nn.Dropout(configs.dropout)\n",
" self.projection = nn.Linear(\n",
" configs.d_model * configs.seq_len, configs.num_class)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 4.1 Forecast\n",
"\n",
"The basic idea of forecasting is to lengthen the known sequence to (seq_len+pred_len), which is the total length after forecasting. Then by several TimesBlock layers together with layer normalization, some underlying intra- and inter- period information is represented. With these information, we can project it to the output space. Whereafter by denorm ( if Non-stationary Transformer) we get the final output."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):\n",
" # Normalization from Non-stationary Transformer at temporal dimension\n",
" means = x_enc.mean(1, keepdim=True).detach() #[B,T]\n",
" x_enc = x_enc - means\n",
" stdev = torch.sqrt(\n",
" torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n",
" x_enc /= stdev\n",
"\n",
" # embedding: projecting a number to a C-channel vector\n",
" enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] C is d_model\n",
" enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(\n",
" 0, 2, 1) # align temporal dimension [B,pred_len+seq_len,C]\n",
" \n",
" # TimesNet: pass through TimesBlock for self.layer times each with layer normalization\n",
" for i in range(self.layer):\n",
" enc_out = self.layer_norm(self.model[i](enc_out))\n",
"\n",
" # project back #[B,T,d_model]-->[B,T,c_out]\n",
" dec_out = self.projection(enc_out) \n",
"\n",
" # De-Normalization from Non-stationary Transformer\n",
" dec_out = dec_out * \\\n",
" (stdev[:, 0, :].unsqueeze(1).repeat(\n",
" 1, self.pred_len + self.seq_len, 1)) #lengthen the stdev to fit the dec_out\n",
" dec_out = dec_out + \\\n",
" (means[:, 0, :].unsqueeze(1).repeat(\n",
" 1, self.pred_len + self.seq_len, 1)) #lengthen the mean to fit the dec_out\n",
" return dec_out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 4.2 Imputation\n",
"\n",
"Imputation is a task aiming at completing some missing value in the time series, so in some degree it's similar to forecast. We can still use the similar step to cope with it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n",
" # Normalization from Non-stationary Transformer\n",
" means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)\n",
" means = means.unsqueeze(1).detach()\n",
" x_enc = x_enc - means\n",
" x_enc = x_enc.masked_fill(mask == 0, 0)\n",
" stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /\n",
" torch.sum(mask == 1, dim=1) + 1e-5)\n",
" stdev = stdev.unsqueeze(1).detach()\n",
" x_enc /= stdev\n",
"\n",
" # embedding\n",
" enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C]\n",
" # TimesNet\n",
" for i in range(self.layer):\n",
" enc_out = self.layer_norm(self.model[i](enc_out))\n",
" # project back\n",
" dec_out = self.projection(enc_out)\n",
"\n",
" # De-Normalization from Non-stationary Transformer\n",
" dec_out = dec_out * \\\n",
" (stdev[:, 0, :].unsqueeze(1).repeat(\n",
" 1, self.pred_len + self.seq_len, 1))\n",
" dec_out = dec_out + \\\n",
" (means[:, 0, :].unsqueeze(1).repeat(\n",
" 1, self.pred_len + self.seq_len, 1))\n",
" return dec_out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 4.3 Anomaly Detection\n",
"\n",
"Similar to Imputation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def anomaly_detection(self, x_enc):\n",
" # Normalization from Non-stationary Transformer\n",
" means = x_enc.mean(1, keepdim=True).detach()\n",
" x_enc = x_enc - means\n",
" stdev = torch.sqrt(\n",
" torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n",
" x_enc /= stdev\n",
" # embedding\n",
" enc_out = self.enc_embedding(x_enc, None) # [B,T,C]\n",
" # TimesNet\n",
" for i in range(self.layer):\n",
" enc_out = self.layer_norm(self.model[i](enc_out))\n",
" # project back\n",
" dec_out = self.projection(enc_out)\n",
" # De-Normalization from Non-stationary Transformer\n",
" dec_out = dec_out * \\\n",
" (stdev[:, 0, :].unsqueeze(1).repeat(\n",
" 1, self.pred_len + self.seq_len, 1))\n",
" dec_out = dec_out + \\\n",
" (means[:, 0, :].unsqueeze(1).repeat(\n",
" 1, self.pred_len + self.seq_len, 1))\n",
" return dec_out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 4.4 Classification"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def classification(self, x_enc, x_mark_enc):\n",
" # embedding\n",
" enc_out = self.enc_embedding(x_enc, None) # [B,T,C]\n",
" # TimesNet\n",
" for i in range(self.layer):\n",
" enc_out = self.layer_norm(self.model[i](enc_out))\n",
"\n",
" # Output\n",
" # the output transformer encoder/decoder embeddings don't include non-linearity\n",
" output = self.act(enc_out)\n",
" output = self.dropout(output)\n",
"\n",
" # zero-out padding embeddings:The primary role of x_mark_enc in the code is to \n",
" # zero out the embeddings for padding positions in the output tensor through \n",
" # element-wise multiplication, helping the model to focus on meaningful data \n",
" # while disregarding padding.\n",
" output = output * x_mark_enc.unsqueeze(-1)\n",
" \n",
" # (batch_size, seq_length * d_model)\n",
" output = output.reshape(output.shape[0], -1)\n",
" output = self.projection(output) # (batch_size, num_classes)\n",
" return output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the end, with so many tasks above, we become able to complete `forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):`. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n",
" if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':\n",
" dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)\n",
" return dec_out[:, -self.pred_len:, :] # [B, L, D] return the predicted part of sequence\n",
" if self.task_name == 'imputation':\n",
" dec_out = self.imputation(\n",
" x_enc, x_mark_enc, x_dec, x_mark_dec, mask)\n",
" return dec_out # [B, L, D] return the whole sequence with missing value estimated\n",
" if self.task_name == 'anomaly_detection':\n",
" dec_out = self.anomaly_detection(x_enc)\n",
" return dec_out # [B, L, D] return the sequence that should be correct\n",
" if self.task_name == 'classification':\n",
" dec_out = self.classification(x_enc, x_mark_enc)\n",
" return dec_out # [B, N] return the classification result\n",
" return None"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5. Training and Settings\n",
"\n",
"By now we've successfully build up `TimesNet`. We are now facing the problem how to train and test this neural network. The action of training, validating as well as testing is implemented at __*exp*__ part, in which codes for different tasks are gathered. These experiments are not only for `TimesNet` training, but also feasible for any other time series representation model. But here, we simply use `TimesNet` to analyse.\n",
"\n",
"`TimesNet` is a state-of-art in multiple tasks, while here we would only introduce its training for long-term forecast task, since the backbone of the training process for other tasks is similar to this one. Again, test and validation code can be easily understood once you've aware how the training process works. So first of all, we are going to focus on the training of `TimesNet` on task long-term forecasting.\n",
"\n",
"We will discuss many aspects, including the training process, training loss etc."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 5.1 Training for Long-term Forecast Task\n",
"\n",
"The following codes represents the process of training model for long-term forecasting task. We'll have a detailed look at it. To make it brief, the training part can be briefly divided into several parts, including Data Preparation, Creating Save Path, Initialization, Optimizer and Loss Function Selection, Using Mixed Precision Training, Training Loop, Validation and Early Stopping, Learning Rate Adjustment, Loading the Best Model.\n",
"\n",
"For more details, please see the code below. 'train' process is defined in the experiment __class Exp_Long_Term_Forecast__."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train(self, setting): #setting is the args for this model training\n",
" #get train dataloader\n",
" train_data, train_loader = self._get_data(flag='train')\n",
" vali_data, vali_loader = self._get_data(flag='val')\n",
" test_data, test_loader = self._get_data(flag='test')\n",
"\n",
" # set path of checkpoint for saving and loading model\n",
" path = os.path.join(self.args.checkpoints, setting)\n",
" if not os.path.exists(path):\n",
" os.makedirs(path)\n",
" time_now = time.time()\n",
"\n",
" train_steps = len(train_loader)\n",
"\n",
" # EarlyStopping is typically a custom class or function that monitors the performance \n",
" # of a model during training, usually by tracking a certain metric (commonly validation \n",
" # loss or accuracy).It's a common technique used in deep learning to prevent overfitting \n",
" # during the training\n",
" early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)\n",
"\n",
" #Optimizer and Loss Function Selection\n",
" model_optim = self._select_optimizer()\n",
" criterion = self._select_criterion()\n",
"\n",
" # AMP training is a technique that uses lower-precision data types (e.g., float16) \n",
" # for certain computations to accelerate training and reduce memory usage.\n",
" if self.args.use_amp: \n",
" scaler = torch.cuda.amp.GradScaler()\n",
" for epoch in range(self.args.train_epochs):\n",
" iter_count = 0\n",
" train_loss = []\n",
" self.model.train()\n",
" epoch_time = time.time()\n",
"\n",
" #begin training in this epoch\n",
" for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):\n",
" iter_count += 1\n",
" model_optim.zero_grad()\n",
" batch_x = batch_x.float().to(self.device) #input features\n",
" batch_y = batch_y.float().to(self.device) #target features\n",
"\n",
" # _mark holds information about time-related features. Specifically, it is a \n",
" # tensor that encodes temporal information and is associated with the \n",
" # input data batch_x.\n",
" batch_x_mark = batch_x_mark.float().to(self.device)\n",
" batch_y_mark = batch_y_mark.float().to(self.device)\n",
" # decoder input(didn't use in TimesNet case)\n",
" dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n",
" dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n",
" # encoder - decoder\n",
" if self.args.use_amp: #in the case of TimesNet, use_amp should be False\n",
" with torch.cuda.amp.autocast():\n",
" # whether to output attention in ecoder,in TimesNet case is no\n",
" if self.args.output_attention: \n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
" # model the input\n",
" else:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
"\n",
" # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, \n",
" # S:univariate predict univariate, MS:multivariate predict univariate'\n",
" #if multivariate predict univariate',then output should be the last column of the decoder\n",
" # output, so f_dim = -1 to only contain the last column, else is all columns\n",
" f_dim = -1 if self.args.features == 'MS' else 0 \n",
" outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
" batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
"\n",
" # calc loss\n",
" loss = criterion(outputs, batch_y)\n",
" train_loss.append(loss.item())\n",
" else: #similar to when use_amp is True\n",
" if self.args.output_attention:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
" else:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
" f_dim = -1 if self.args.features == 'MS' else 0\n",
" outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
" batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
" loss = criterion(outputs, batch_y)\n",
" train_loss.append(loss.item())\n",
"\n",
" # When train rounds attain some 100-multiple, print speed, left time, loss. etc feedback\n",
" if (i + 1) % 100 == 0:\n",
" print(\"\\titers: {0}, epoch: {1} | loss: {2:.7f}\".format(i + 1, epoch + 1, loss.item()))\n",
" speed = (time.time() - time_now) / iter_count\n",
" left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)\n",
" print('\\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))\n",
" iter_count = 0\n",
" time_now = time.time()\n",
"\n",
" #BP\n",
" if self.args.use_amp:\n",
" scaler.scale(loss).backward()\n",
" scaler.step(model_optim)\n",
" scaler.update()\n",
" else:\n",
" loss.backward()\n",
" model_optim.step()\n",
" \n",
" #This epoch comes to end, print information\n",
" print(\"Epoch: {} cost time: {}\".format(epoch + 1, time.time() - epoch_time))\n",
" train_loss = np.average(train_loss)\n",
"\n",
" #run test and validation on current model\n",
" vali_loss = self.vali(vali_data, vali_loader, criterion)\n",
" test_loss = self.vali(test_data, test_loader, criterion)\n",
"\n",
" #print train, test, vali loss information\n",
" print(\"Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}\".format(\n",
" epoch + 1, train_steps, train_loss, vali_loss, test_loss))\n",
" \n",
" #Decide whether to trigger Early Stopping. if early_stop is true, it means that \n",
" #this epoch's training is now at a flat slope, so stop further training for this epoch.\n",
" early_stopping(vali_loss, self.model, path)\n",
" if early_stopping.early_stop:\n",
" print(\"Early stopping\")\n",
" break\n",
"\n",
" #adjust learning keys\n",
" adjust_learning_rate(model_optim, epoch + 1, self.args)\n",
" best_model_path = path + '/' + 'checkpoint.pth'\n",
"\n",
" # loading the trained model's state dictionary from a saved checkpoint file \n",
" # located at best_model_path.\n",
" self.model.load_state_dict(torch.load(best_model_path))\n",
" return self.model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you want to learn more, please see it at exp/exp_long_term_forecasting.py"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 5.2 Early Stopping Mechanism\n",
"\n",
"__EarlyStopping__ is typically a custom class or function that monitors the performance of a model during training, usually by tracking a certain metric (commonly validation loss or accuracy).It's a common technique used in deep learning to prevent overfitting during the training.\n",
"\n",
"Let's see the code below(original code is in `tools.py`)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class EarlyStopping:\n",
" def __init__(self, patience=7, verbose=False, delta=0):\n",
" self.patience = patience # how many times will you tolerate for loss not being on decrease\n",
" self.verbose = verbose # whether to print tip info\n",
" self.counter = 0 # now how many times loss not on decrease\n",
" self.best_score = None\n",
" self.early_stop = False\n",
" self.val_loss_min = np.inf\n",
" self.delta = delta\n",
"\n",
" def __call__(self, val_loss, model, path):\n",
" score = -val_loss\n",
" if self.best_score is None:\n",
" self.best_score = score\n",
" self.save_checkpoint(val_loss, model, path)\n",
"\n",
" # meaning: current score is not 'delta' better than best_score, representing that \n",
" # further training may not bring remarkable improvement in loss. \n",
" elif score < self.best_score + self.delta: \n",
" self.counter += 1\n",
" print(f'EarlyStopping counter: {self.counter} out of {self.patience}')\n",
" # 'No Improvement' times become higher than patience --> Stop Further Training\n",
" if self.counter >= self.patience:\n",
" self.early_stop = True\n",
"\n",
" else: #model's loss is still on decrease, save the now best model and go on training\n",
" self.best_score = score\n",
" self.save_checkpoint(val_loss, model, path)\n",
" self.counter = 0\n",
"\n",
" def save_checkpoint(self, val_loss, model, path):\n",
" ### used for saving the current best model\n",
" if self.verbose:\n",
" print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')\n",
" torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')\n",
" self.val_loss_min = val_loss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 5.3 Optimizer and Criterion\n",
"\n",
"The optimizer and criterion are defined in __class Exp_Long_Term_Forecast__ and called in the training process by function `self._select_optimizer()` and `self._select_criterion()`. Here, for long-term forecasting task, we simply adopt Adam optimizer and MSELoss to meature the loss between real data and predicted ones."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def _select_optimizer(self):\n",
" model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)\n",
" return model_optim\n",
"\n",
"def _select_criterion(self):\n",
" criterion = nn.MSELoss()\n",
" return criterion"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 5.4 Automatic Mixed Precision(AMP)\n",
"\n",
"AMP is a technique used in deep learning to improve training speed and reduce memory usage. AMP achieves this by mixing calculations in half-precision (16-bit floating-point) and single-precision (32-bit floating-point).\n",
"\n",
"Let's have a closer look on this snippet:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#in forward process:\n",
"with torch.cuda.amp.autocast():\n",
"\n",
"...\n",
"\n",
"#in BP process:\n",
"if self.args.use_amp:\n",
" scaler.scale(loss).backward()\n",
" scaler.step(model_optim)\n",
" scaler.update()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"` with torch.cuda.amp.autocast():` : The purpose of using torch.cuda.amp.autocast() is to take advantage of the speed and memory efficiency benefits of mixed-precision training while maintaining numerical stability. Some deep learning models can benefit significantly from this technique, especially on modern GPUs with hardware support for half-precision arithmetic. It allows you to perform certain calculations more quickly while still ensuring that critical calculations (e.g., gradient updates) are performed with sufficient precision to avoid loss of accuracy.\n",
"\n",
"`scaler.scale(loss).backward()`: If AMP is enabled, it uses a scaler object created with torch.cuda.amp.GradScaler() to automatically scale the loss and perform backward propagation. This is a crucial part of AMP, ensuring numerical stability. Before backpropagation, the loss is scaled to an appropriate range to prevent gradients from diverging too quickly or causing numerical instability.\n",
"\n",
"`scaler.step(model_optim)`: Next, the scaler calls the step method, which applies the scaled gradients to the model's optimizer (model_optim). This is used to update the model's weights to minimize the loss function.\n",
"\n",
"`scaler.update()`: Finally, the scaler calls the update method, which updates the scaling factor to ensure correct scaling of the loss for the next iteration. This step helps dynamically adjust the scaling of gradients to adapt to different training scenarios."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 5.5 Learning Rate Adjustment\n",
"\n",
"While the optimizer are responsible for adapting the learning rate with epochs, we would still like to do some adjustment on it manually, as indicated in the function `adjust_learning_rate(model_optim, epoch + 1, self.args)`, whose codes are shown below(original code is in `tools.py`): "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def adjust_learning_rate(optimizer, epoch, args):\n",
"\n",
" #first type: learning rate decrease with epoch by exponential\n",
" if args.lradj == 'type1':\n",
" lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}\n",
"\n",
" #second type: learning rate decrease manually\n",
" elif args.lradj == 'type2':\n",
" lr_adjust = {\n",
" 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,\n",
" 10: 5e-7, 15: 1e-7, 20: 5e-8\n",
" }\n",
"\n",
" #1st type: update in each epoch\n",
" #2nd type: only update in epochs that are written in Dict lr_adjust\n",
" if epoch in lr_adjust.keys():\n",
" lr = lr_adjust[epoch]\n",
" \n",
" # change the learning rate for different parameter groups within the optimizer\n",
" for param_group in optimizer.param_groups:\n",
" param_group['lr'] = lr\n",
" print('Updating learning rate to {}'.format(lr))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6. Validation and Testing\n",
"\n",
"During training, the model continuously adjusts its weights and parameters to minimize training error. However, this may not reflect the model's performance on unseen data. Validation allows us to periodically assess the model's performance on data that is different from the training data, providing insights into the model's generalization ability.\n",
"\n",
"By comparing performance on the validation set, we can identify whether the model is overfitting. Overfitting occurs when a model performs well on training data but poorly on unseen data. Monitoring performance on the validation set helps detect overfitting early and take measures to prevent it, such as early stopping or adjusting hyperparameters.\n",
"\n",
"Here, we still take long-term forecasting as an example, similar to train process:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def vali(self, vali_data, vali_loader, criterion):\n",
" total_loss = []\n",
"\n",
" #evaluation mode\n",
" self.model.eval()\n",
" with torch.no_grad():\n",
" for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):\n",
" batch_x = batch_x.float().to(self.device)\n",
" batch_y = batch_y.float()\n",
"\n",
" batch_x_mark = batch_x_mark.float().to(self.device)\n",
" batch_y_mark = batch_y_mark.float().to(self.device)\n",
"\n",
" # decoder input\n",
" dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n",
" dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n",
" # encoder - decoder\n",
" if self.args.use_amp:\n",
" with torch.cuda.amp.autocast():\n",
" if self.args.output_attention:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
" else:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
" else:\n",
" if self.args.output_attention:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
" else:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
" f_dim = -1 if self.args.features == 'MS' else 0\n",
" outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
" batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
"\n",
" pred = outputs.detach().cpu()\n",
" true = batch_y.detach().cpu()\n",
"\n",
" loss = criterion(pred, true)\n",
"\n",
" total_loss.append(loss)\n",
" total_loss = np.average(total_loss)\n",
" self.model.train()\n",
" return total_loss"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing is similar to validation, but it's purpose is to examine how well the model behaves, so it's common to add some visualization with __matplotlib.pyplot__. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"def visual(true, preds=None, name='./pic/test.pdf'):\n",
" \"\"\"\n",
" Results visualization\n",
" \"\"\"\n",
" plt.figure()\n",
" plt.plot(true, label='GroundTruth', linewidth=2)\n",
" if preds is not None:\n",
" plt.plot(preds, label='Prediction', linewidth=2)\n",
" plt.legend()\n",
" plt.savefig(name, bbox_inches='tight')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def test(self, setting, test=0):\n",
" test_data, test_loader = self._get_data(flag='test')\n",
" if test:\n",
" print('loading model')\n",
" self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))\n",
"\n",
" preds = []\n",
" trues = []\n",
" folder_path = './test_results/' + setting + '/'\n",
" if not os.path.exists(folder_path):\n",
" os.makedirs(folder_path)\n",
"\n",
" self.model.eval()\n",
" with torch.no_grad():\n",
" for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):\n",
" batch_x = batch_x.float().to(self.device)\n",
" batch_y = batch_y.float().to(self.device)\n",
"\n",
" batch_x_mark = batch_x_mark.float().to(self.device)\n",
" batch_y_mark = batch_y_mark.float().to(self.device)\n",
"\n",
" # decoder input\n",
" dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n",
" dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n",
" # encoder - decoder\n",
" if self.args.use_amp:\n",
" with torch.cuda.amp.autocast():\n",
" if self.args.output_attention:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
" else:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
" else:\n",
" if self.args.output_attention:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
"\n",
" else:\n",
" outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
"\n",
" f_dim = -1 if self.args.features == 'MS' else 0\n",
" outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
" batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
" outputs = outputs.detach().cpu().numpy()\n",
" batch_y = batch_y.detach().cpu().numpy()\n",
"\n",
" #inverse the data if scaled\n",
" if test_data.scale and self.args.inverse:\n",
" outputs = test_data.inverse_transform(outputs)\n",
" batch_y = test_data.inverse_transform(batch_y)\n",
"\n",
" pred = outputs\n",
" true = batch_y\n",
"\n",
" preds.append(pred)\n",
" trues.append(true)\n",
"\n",
" #visualize one piece of data every 20\n",
" if i % 20 == 0:\n",
" input = batch_x.detach().cpu().numpy()\n",
" #the whole sequence\n",
" gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)\n",
" pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)\n",
" visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))\n",
"\n",
" preds = np.array(preds)\n",
" trues = np.array(trues) # shape[batch_num, batch_size, pred_len, features]\n",
" print('test shape:', preds.shape, trues.shape)\n",
" preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])\n",
" trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])\n",
" print('test shape:', preds.shape, trues.shape)\n",
"\n",
" # result save\n",
" folder_path = './results/' + setting + '/'\n",
" if not os.path.exists(folder_path):\n",
" os.makedirs(folder_path)\n",
"\n",
" mae, mse, rmse, mape, mspe = metric(preds, trues)\n",
" print('mse:{}, mae:{}'.format(mse, mae))\n",
" f = open(\"result_long_term_forecast.txt\", 'a')\n",
" f.write(setting + \" \\n\")\n",
" f.write('mse:{}, mae:{}'.format(mse, mae))\n",
" f.write('\\n')\n",
" f.write('\\n')\n",
" f.close()\n",
" \n",
" np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))\n",
" np.save(folder_path + 'pred.npy', preds)\n",
" np.save(folder_path + 'true.npy', trues)\n",
"\n",
" return\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 7. Dataloader and DataProvider\n",
"\n",
"In the process of training, we simply take the dataloader for granted, by the function `self._get_data(flag='train')`. So how does this line work? Have a look at the definition(in __class Exp_Long_Term_Forecast__):"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def _get_data(self, flag):\n",
" data_set, data_loader = data_provider(self.args, flag)\n",
" return data_set, data_loader"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"One step forward, see `data_provider(self.args, flag)`(in `data_factory.py`):"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Below are some dataloaders defined in data_loader.py. If you want to add your own data, \n",
"# go and check data_loader.py to rewrite a dataloader to fit your data.\n",
"data_dict = {\n",
" 'ETTh1': Dataset_ETT_hour,\n",
" 'ETTh2': Dataset_ETT_hour,\n",
" 'ETTm1': Dataset_ETT_minute,\n",
" 'ETTm2': Dataset_ETT_minute,\n",
" 'custom': Dataset_Custom,\n",
" 'm4': Dataset_M4,\n",
" 'PSM': PSMSegLoader,\n",
" 'MSL': MSLSegLoader,\n",
" 'SMAP': SMAPSegLoader,\n",
" 'SMD': SMDSegLoader,\n",
" 'SWAT': SWATSegLoader,\n",
" 'UEA': UEAloader\n",
"}\n",
"\n",
"\n",
"def data_provider(args, flag):\n",
" Data = data_dict[args.data] #data_provider\n",
"\n",
" # time features encoding, options:[timeF, fixed, learned]\n",
" timeenc = 0 if args.embed != 'timeF' else 1\n",
"\n",
" #test data provider\n",
" if flag == 'test':\n",
" shuffle_flag = False\n",
" drop_last = True\n",
" if args.task_name == 'anomaly_detection' or args.task_name == 'classification':\n",
" batch_size = args.batch_size\n",
"\n",
" #Some tasks during the testing phase may require evaluating samples one at a time. \n",
" # This could be due to variations in sample sizes in the test data or because the \n",
" # evaluation process demands finer-grained results or different processing. \n",
" else:\n",
" batch_size = 1 # bsz=1 for evaluation\n",
"\n",
" #freq for time features encoding, \n",
" # options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly,\n",
" # m:monthly], you can also use more detailed freq like 15min or 3h')\n",
" freq = args.freq\n",
" else:\n",
" shuffle_flag = True\n",
" drop_last = True\n",
" batch_size = args.batch_size # bsz for train and valid\n",
" freq = args.freq\n",
"\n",
" if args.task_name == 'anomaly_detection':\n",
" drop_last = False\n",
" data_set = Data(\n",
" root_path=args.root_path, #root path of the data file\n",
" win_size=args.seq_len, #input sequence length\n",
" flag=flag,\n",
" )\n",
" print(flag, len(data_set))\n",
" data_loader = DataLoader(\n",
" data_set,\n",
" batch_size=batch_size,\n",
" shuffle=shuffle_flag,\n",
" num_workers=args.num_workers,#data loader num workers\n",
" drop_last=drop_last)\n",
" return data_set, data_loader\n",
"\n",
" elif args.task_name == 'classification':\n",
" drop_last = False\n",
" data_set = Data(\n",
" root_path=args.root_path,\n",
" flag=flag,\n",
" )\n",
"\n",
" data_loader = DataLoader(\n",
" data_set,\n",
" batch_size=batch_size,\n",
" shuffle=shuffle_flag,\n",
" num_workers=args.num_workers,\n",
" drop_last=drop_last,\n",
" collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) \n",
" #define some limits to collate pieces of data into batches\n",
" )\n",
" return data_set, data_loader\n",
" else:\n",
" if args.data == 'm4':\n",
" drop_last = False\n",
" data_set = Data(\n",
" root_path=args.root_path, #eg. ./data/ETT/\n",
" data_path=args.data_path, #eg. ETTh1.csv\n",
" flag=flag,\n",
" size=[args.seq_len, args.label_len, args.pred_len],\n",
" features=args.features, #forecasting task, options:[M, S, MS]; \n",
" # M:multivariate predict multivariate, S:univariate predict univariate,\n",
" # MS:multivariate predict univariate\n",
" \n",
" target=args.target, #target feature in S or MS task\n",
" timeenc=timeenc,\n",
" freq=freq,\n",
" seasonal_patterns=args.seasonal_patterns\n",
" )\n",
" print(flag, len(data_set))\n",
" data_loader = DataLoader(\n",
" data_set,\n",
" batch_size=batch_size,\n",
" shuffle=shuffle_flag,\n",
" num_workers=args.num_workers,\n",
" drop_last=drop_last)\n",
" return data_set, data_loader\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"From above, it's easy to find that data_provider is responsible for collate the dataset into batches according to different tasks and running mode. It passes the parameters to dataloader(`Data`) to instruct it how to manage a data file into pieces of usable data. Then it also generates the final dara_loader by passing the built-up dataset and some other params to the standard class Dataloader. After that, a dataset that fits the need of the model and a enumerable dataloader are generated. \n",
"\n",
"So how to organize the data file into pieces of data that fits the model? Let's see `data_loader.py`! There are many dataloaders in it, and of course you can write your own dataloader, but here we'll only focus on __class Dataset_ETT_hour(Dataset)__ as an example."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class Dataset_ETT_hour(Dataset):\n",
" def __init__(self, root_path, flag='train', size=None,\n",
" features='S', data_path='ETTh1.csv',\n",
" target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):\n",
" ... \n",
" def __read_data__(self):\n",
" ... \n",
" def __getitem__(self, index):\n",
" ...\n",
" \n",
" def __len__(self):\n",
" ...\n",
" \n",
" def inverse_transform(self, data):\n",
" ..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`__init__()` is the constructor used to initialize various parameters and attributes of the dataset. It takes a series of arguments, including the path to the data file, the dataset's flag (e.g., train, validate, test), dataset size, feature type, target variable, whether to scale the data, time encoding, time frequency, and more. These parameters are used to configure how the dataset is loaded and processed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def __init__(self, root_path, flag='train', size=None,\n",
" features='S', data_path='ETTh1.csv',\n",
" target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):\n",
" # size [seq_len, label_len, pred_len]\n",
" # info\n",
" if size == None:\n",
" self.seq_len = 24 * 4 * 4\n",
" self.label_len = 24 * 4\n",
" self.pred_len = 24 * 4\n",
" else:\n",
" self.seq_len = size[0]\n",
" self.label_len = size[1]\n",
" self.pred_len = size[2]\n",
" # init\n",
" assert flag in ['train', 'test', 'val']\n",
" type_map = {'train': 0, 'val': 1, 'test': 2}\n",
" self.set_type = type_map[flag]\n",
" self.features = features\n",
" self.target = target\n",
" self.scale = scale\n",
" self.timeenc = timeenc\n",
" self.freq = freq\n",
" self.root_path = root_path\n",
" self.data_path = data_path\n",
" \n",
" # After initialization, call __read_data__() to manage the data file.\n",
" self.__read_data__()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The actual process of managing data file into usable data pieces happens in `__read_data__()`, see below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def __read_data__(self):\n",
" self.scaler = StandardScaler()\n",
"\n",
" #get raw data from path\n",
" df_raw = pd.read_csv(os.path.join(self.root_path,\n",
" self.data_path))\n",
"\n",
" # split data set into train, vali, test. border1 is the left border and border2 is the right.\n",
" # Once flag(train, vali, test) is determined, __read_data__ will return certain part of the dataset.\n",
" border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]\n",
" border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]\n",
" border1 = border1s[self.set_type]\n",
" border2 = border2s[self.set_type]\n",
"\n",
" #decide which columns to select\n",
" if self.features == 'M' or self.features == 'MS':\n",
" cols_data = df_raw.columns[1:] # column name list (remove 'date')\n",
" df_data = df_raw[cols_data] #remove the first column, which is time stamp info\n",
" elif self.features == 'S':\n",
" df_data = df_raw[[self.target]] # target column\n",
"\n",
" #scale data by the scaler that fits training data\n",
" if self.scale:\n",
" train_data = df_data[border1s[0]:border2s[0]]\n",
" #train_data.values: turn pandas DataFrame into 2D numpy\n",
" self.scaler.fit(train_data.values) \n",
" data = self.scaler.transform(df_data.values)\n",
" else:\n",
" data = df_data.values \n",
" \n",
" #time stamp:df_stamp is a object of and\n",
" # has one column called 'date' like 2016-07-01 00:00:00\n",
" df_stamp = df_raw[['date']][border1:border2]\n",
" \n",
" # Since the date format is uncertain across different data file, we need to \n",
" # standardize it so we call func 'pd.to_datetime'\n",
" df_stamp['date'] = pd.to_datetime(df_stamp.date) \n",
"\n",
" if self.timeenc == 0: #time feature encoding is fixed or learned\n",
" df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)\n",
" df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)\n",
" df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)\n",
" df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)\n",
" #now df_frame has multiple columns recording the month, day etc. time stamp\n",
" # next we delete the 'date' column and turn 'DataFrame' to a list\n",
" data_stamp = df_stamp.drop(['date'], 1).values\n",
"\n",
" elif self.timeenc == 1: #time feature encoding is timeF\n",
" '''\n",
" when entering this branch, we choose arg.embed as timeF meaning we want to \n",
" encode the temporal info. 'freq' should be the smallest time step, and has \n",
" options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')\n",
" So you should check the timestep of your data and set 'freq' arg. \n",
" After the time_features encoding, each date info format will be encoded into \n",
" a list, with each element denoting the relative position of this time point\n",
" (e.g. Day of Week, Day of Month, Hour of Day) and each normalized within scope[-0.5, 0.5]\n",
" '''\n",
" data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)\n",
" data_stamp = data_stamp.transpose(1, 0)\n",
" \n",
" \n",
" # data_x and data_y are same copy of a certain part of data\n",
" self.data_x = data[border1:border2]\n",
" self.data_y = data[border1:border2]\n",
" self.data_stamp = data_stamp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`__read_data__()` splits the dataset into 3 parts, selects the needed columns and manages time stamp info. It gives out the well-managed data array for later use. Next, we have to finish the overload of __class Dataset__, see `__getitem__(self, index)` and `__len__(self)`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def __getitem__(self, index):\n",
" #given an index, calculate the positions after this index to truncate the dataset\n",
" s_begin = index\n",
" s_end = s_begin + self.seq_len\n",
" r_begin = s_end - self.label_len\n",
" r_end = r_begin + self.label_len + self.pred_len\n",
"\n",
" #input and output sequence\n",
" seq_x = self.data_x[s_begin:s_end]\n",
" seq_y = self.data_y[r_begin:r_end]\n",
"\n",
" #time mark\n",
" seq_x_mark = self.data_stamp[s_begin:s_end]\n",
" seq_y_mark = self.data_stamp[r_begin:r_end]\n",
"\n",
" return seq_x, seq_y, seq_x_mark, seq_y_mark\n",
"\n",
"def __len__(self):\n",
" return len(self.data_x) - self.seq_len - self.pred_len + 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also add an inverse_transform for scaler if needed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def inverse_transform(self, data):\n",
" return self.scaler.inverse_transform(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"By now, we have finished constructing the dataset and dataloader. If you want to construct your own data and run it on the net, you can find proper data and try to accomplish the functions listed above. Here are some widely used datasets in times series analysis.\n",
"\n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 8. Running the Experiment and Visualizing Result\n",
"\n",
"After managing the data, model well, we need to write a shell script for the experiment. In the script, we need to run `run.py` with several arguments, which is part of the configuration. Here, let's see `TimesNet` on task long-term forecast with dataset ETTh1 for example."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"model_name=TimesNet\n",
"\n",
"\n",
"python -u run.py \\\n",
" --task_name long_term_forecast \\\n",
" --is_training 1 \\\n",
" --root_path ./dataset/ETT-small/ \\\n",
" --data_path ETTh1.csv \\\n",
" --model_id ETTh1_96_96 \\\n",
" --model $model_name \\\n",
" --data ETTh1 \\\n",
" --features M \\\n",
" --seq_len 96 \\\n",
" --label_len 48 \\\n",
" --pred_len 96 \\\n",
" --e_layers 2 \\\n",
" --d_layers 1 \\\n",
" --factor 3 \\\n",
" --enc_in 7 \\\n",
" --dec_in 7 \\\n",
" --c_out 7 \\\n",
" --d_model 16 \\\n",
" --d_ff 32 \\\n",
" --des 'Exp' \\\n",
" --itr 1 \\\n",
" --top_k 5\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After finishing the shell script, you can run it in shell using bash. For example, you can run the following command, for `TimesNet` ETTh1 long-term forecast:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here, the bash command may not be successfully implemented due to a lack of proper packages in the environment. If that's the case, simply follow the error information to install the missing package step by step until you achieve success. The sign of a successful experiment running is that information about the experiment is printed out, such as:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"Namespace(task_name='long_term_forecast', is_training=1, model_id='ETTh1_96_96', model='TimesNet', data='ETTh1', root_path='./dataset/ETT-small/', data_path='ETTh1.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', seq_len=96, label_len=48, pred_len=96, seasonal_patterns='Monthly', inverse=False, mask_rate=0.25, anomaly_ratio=0.25, top_k=5, num_kernels=6, enc_in=7, dec_in=7, c_out=7, d_model=16, n_heads=8, e_layers=2, d_layers=1, d_ff=32, moving_avg=25, factor=3, distil=True, dropout=0.1, embed='timeF', activation='gelu', output_attention=False, num_workers=10, itr=1, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', loss='MSE', lradj='type1', use_amp=False, use_gpu=False, gpu=0, use_multi_gpu=False, devices='0,1,2,3', p_hidden_dims=[128, 128], p_hidden_layers=2)\n",
"Use GPU: cuda:0\n",
">>>>>>>start training : long_term_forecast_ETTh1_96_96_TimesNet_ETTh1_ftM_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_Exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>\n",
"train 8449\n",
"val 2785\n",
"test 2785"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then, the model starts training. Once one epoch finishes training, information like below will be printer out:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
" iters: 100, epoch: 1 | loss: 0.4701951\n",
" speed: 0.2108s/iter; left time: 535.7317s\n",
" iters: 200, epoch: 1 | loss: 0.4496171\n",
" speed: 0.0615s/iter; left time: 150.0223s\n",
"Epoch: 1 cost time: 30.09317970275879\n",
"Epoch: 1, Steps: 264 | Train Loss: 0.4964185 Vali Loss: 0.8412074 Test Loss: 0.4290483\n",
"Validation loss decreased (inf --> 0.841207). Saving model ...\n",
"Updating learning rate to 0.0001"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When all epochs are done, the model steps into testing. The following information about testing will be printed out, giving the MAE and MSE of test."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
">>>>>>>testing : long_term_forecast_ETTh1_96_96_TimesNet_ETTh1_ftM_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_Exp_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n",
"test 2785\n",
"test shape: (2785, 1, 96, 7) (2785, 1, 96, 7)\n",
"test shape: (2785, 96, 7) (2785, 96, 7)\n",
"mse:0.3890332877635956, mae:0.41201362013816833"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After test finishes, some visible information are already stored in the test_results folder in PDF format. For example:\n",
"\n",
""
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: utils/ADFtest.py
================================================
import pandas as pd
import numpy as np
import os
from statsmodels.tsa.stattools import adfuller
from arch.unitroot import ADF
def calculate_ADF(root_path,data_path):
df_raw = pd.read_csv(os.path.join(root_path,data_path))
cols = list(df_raw.columns)
cols.remove('date')
df_raw = df_raw[cols]
adf_list = []
for i in cols:
df_data = df_raw[i]
adf = adfuller(df_data, maxlag = 1)
print(adf)
adf_list.append(adf)
return np.array(adf_list)
def calculate_target_ADF(root_path,data_path,target='OT'):
df_raw = pd.read_csv(os.path.join(root_path,data_path))
target_cols = target.split(',')
# df_data = df_raw[target]
df_raw = df_raw[target_cols]
adf_list = []
for i in target_cols:
df_data = df_raw[i]
adf = adfuller(df_data, maxlag = 1)
# print(adf)
adf_list.append(adf)
return np.array(adf_list)
def archADF(root_path, data_path):
df = pd.read_csv(os.path.join(root_path,data_path))
cols = df.columns[1:]
stats = 0
for target_col in cols:
series = df[target_col].values
adf = ADF(series)
stat = adf.stat
stats += stat
return stats/len(cols)
if __name__ == '__main__':
# * Exchange - result: -1.902402344564288 | report: -1.889
ADFmetric = archADF(root_path="./dataset/exchange_rate/",data_path="exchange_rate.csv")
print("Exchange ADF metric", ADFmetric)
# * Illness - result: -5.33416661870624 | report: -5.406
ADFmetric = archADF(root_path="./dataset/illness/",data_path="national_illness.csv")
print("Illness ADF metric", ADFmetric)
# * ETTm2 - result: -5.663628743471695 | report: -6.225
ADFmetric = archADF(root_path="./dataset/ETT-small/",data_path="ETTm2.csv")
print("ETTm2 ADF metric", ADFmetric)
# * Electricity - result: -8.44485821939281 | report: -8.483
ADFmetric = archADF(root_path="./dataset/electricity/",data_path="electricity.csv")
print("Electricity ADF metric", ADFmetric)
# * Traffic - result: -15.020978067839014 | report: -15.046
ADFmetric = archADF(root_path="./dataset/traffic/",data_path="traffic.csv")
print("Traffic ADF metric", ADFmetric)
# * Weather - result: -26.681433085204866 | report: -26.661
ADFmetric = archADF(root_path="./dataset/weather/",data_path="weather.csv")
print("Weather ADF metric", ADFmetric)
# print(ADFmetric)
# mean_ADFmetric = ADFmetric[:,0].mean()
# print(mean_ADFmetric)
================================================
FILE: utils/__init__.py
================================================
================================================
FILE: utils/augmentation.py
================================================
import numpy as np
from tqdm import tqdm
def jitter(x, sigma=0.03):
# https://arxiv.org/pdf/1706.00527.pdf
return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
def scaling(x, sigma=0.1):
# https://arxiv.org/pdf/1706.00527.pdf
factor = np.random.normal(loc=1., scale=sigma, size=(x.shape[0],x.shape[2]))
return np.multiply(x, factor[:,np.newaxis,:])
def rotation(x):
x = np.array(x)
flip = np.random.choice([-1, 1], size=(x.shape[0],x.shape[2]))
rotate_axis = np.arange(x.shape[2])
np.random.shuffle(rotate_axis)
return flip[:,np.newaxis,:] * x[:,:,rotate_axis]
def permutation(x, max_segments=5, seg_mode="equal"):
orig_steps = np.arange(x.shape[1])
num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
ret = np.zeros_like(x)
for i, pat in enumerate(x):
if num_segs[i] > 1:
if seg_mode == "random":
split_points = np.random.choice(x.shape[1]-2, num_segs[i]-1, replace=False)
split_points.sort()
splits = np.split(orig_steps, split_points)
else:
splits = np.array_split(orig_steps, num_segs[i])
warp = np.concatenate(np.random.permutation(splits)).ravel()
# ? Question: What is the point of making segments?
# for i in range(len(splits)):
# permute = np.random.permutation(splits[i])
ret[i] = pat[warp]
else:
ret[i] = pat
return ret
def magnitude_warp(x, sigma=0.2, knot=4):
from scipy.interpolate import CubicSpline
orig_steps = np.arange(x.shape[1])
random_warps = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], knot+2, x.shape[2]))
warp_steps = (np.ones((x.shape[2],1))*(np.linspace(0, x.shape[1]-1., num=knot+2))).T
ret = np.zeros_like(x)
for i, pat in enumerate(x):
warper = np.array([CubicSpline(warp_steps[:,dim], random_warps[i,:,dim])(orig_steps) for dim in range(x.shape[2])]).T
ret[i] = pat * warper
return ret
def time_warp(x, sigma=0.2, knot=4):
from scipy.interpolate import CubicSpline
orig_steps = np.arange(x.shape[1])
random_warps = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], knot+2, x.shape[2]))
warp_steps = (np.ones((x.shape[2],1))*(np.linspace(0, x.shape[1]-1., num=knot+2))).T
ret = np.zeros_like(x)
for i, pat in enumerate(x):
for dim in range(x.shape[2]):
time_warp = CubicSpline(warp_steps[:,dim], warp_steps[:,dim] * random_warps[i,:,dim])(orig_steps)
scale = (x.shape[1]-1)/time_warp[-1]
ret[i,:,dim] = np.interp(orig_steps, np.clip(scale*time_warp, 0, x.shape[1]-1), pat[:,dim]).T
return ret
def window_slice(x, reduce_ratio=0.9):
# https://halshs.archives-ouvertes.fr/halshs-01357973/document
target_len = np.ceil(reduce_ratio*x.shape[1]).astype(int)
if target_len >= x.shape[1]:
return x
starts = np.random.randint(low=0, high=x.shape[1]-target_len, size=(x.shape[0])).astype(int)
ends = (target_len + starts).astype(int)
ret = np.zeros_like(x)
for i, pat in enumerate(x):
for dim in range(x.shape[2]):
ret[i,:,dim] = np.interp(np.linspace(0, target_len, num=x.shape[1]), np.arange(target_len), pat[starts[i]:ends[i],dim]).T
return ret
def window_warp(x, window_ratio=0.1, scales=[0.5, 2.]):
# https://halshs.archives-ouvertes.fr/halshs-01357973/document
warp_scales = np.random.choice(scales, x.shape[0])
warp_size = np.ceil(window_ratio*x.shape[1]).astype(int)
window_steps = np.arange(warp_size)
window_starts = np.random.randint(low=1, high=x.shape[1]-warp_size-1, size=(x.shape[0])).astype(int)
window_ends = (window_starts + warp_size).astype(int)
ret = np.zeros_like(x)
for i, pat in enumerate(x):
for dim in range(x.shape[2]):
start_seg = pat[:window_starts[i],dim]
window_seg = np.interp(np.linspace(0, warp_size-1, num=int(warp_size*warp_scales[i])), window_steps, pat[window_starts[i]:window_ends[i],dim])
end_seg = pat[window_ends[i]:,dim]
warped = np.concatenate((start_seg, window_seg, end_seg))
ret[i,:,dim] = np.interp(np.arange(x.shape[1]), np.linspace(0, x.shape[1]-1., num=warped.size), warped).T
return ret
def spawner(x, labels, sigma=0.05, verbose=0):
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6983028/
# use verbose=-1 to turn off warnings
# use verbose=1 to print out figures
import utils.dtw as dtw
random_points = np.random.randint(low=1, high=x.shape[1]-1, size=x.shape[0])
window = np.ceil(x.shape[1] / 10.).astype(int)
orig_steps = np.arange(x.shape[1])
l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
ret = np.zeros_like(x)
# for i, pat in enumerate(tqdm(x)):
for i, pat in enumerate(x):
# guarentees that same one isnt selected
choices = np.delete(np.arange(x.shape[0]), i)
# remove ones of different classes
choices = np.where(l[choices] == l[i])[0]
if choices.size > 0:
random_sample = x[np.random.choice(choices)]
# SPAWNER splits the path into two randomly
path1 = dtw.dtw(pat[:random_points[i]], random_sample[:random_points[i]], dtw.RETURN_PATH, slope_constraint="symmetric", window=window)
path2 = dtw.dtw(pat[random_points[i]:], random_sample[random_points[i]:], dtw.RETURN_PATH, slope_constraint="symmetric", window=window)
combined = np.concatenate((np.vstack(path1), np.vstack(path2+random_points[i])), axis=1)
if verbose:
# print(random_points[i])
dtw_value, cost, DTW_map, path = dtw.dtw(pat, random_sample, return_flag = dtw.RETURN_ALL, slope_constraint=slope_constraint, window=window)
dtw.draw_graph1d(cost, DTW_map, path, pat, random_sample)
dtw.draw_graph1d(cost, DTW_map, combined, pat, random_sample)
mean = np.mean([pat[combined[0]], random_sample[combined[1]]], axis=0)
for dim in range(x.shape[2]):
ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=mean.shape[0]), mean[:,dim]).T
else:
# if verbose > -1:
# print("There is only one pattern of class {}, skipping pattern average".format(l[i]))
ret[i,:] = pat
return jitter(ret, sigma=sigma)
def wdba(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, verbose=0):
# https://ieeexplore.ieee.org/document/8215569
# use verbose = -1 to turn off warnings
# slope_constraint is for DTW. "symmetric" or "asymmetric"
x = np.array(x)
import utils.dtw as dtw
if use_window:
window = np.ceil(x.shape[1] / 10.).astype(int)
else:
window = None
orig_steps = np.arange(x.shape[1])
l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
ret = np.zeros_like(x)
# for i in tqdm(range(ret.shape[0])):
for i in range(ret.shape[0]):
# get the same class as i
choices = np.where(l == l[i])[0]
if choices.size > 0:
# pick random intra-class pattern
k = min(choices.size, batch_size)
random_prototypes = x[np.random.choice(choices, k, replace=False)]
# calculate dtw between all
dtw_matrix = np.zeros((k, k))
for p, prototype in enumerate(random_prototypes):
for s, sample in enumerate(random_prototypes):
if p == s:
dtw_matrix[p, s] = 0.
else:
dtw_matrix[p, s] = dtw.dtw(prototype, sample, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
# get medoid
medoid_id = np.argsort(np.sum(dtw_matrix, axis=1))[0]
nearest_order = np.argsort(dtw_matrix[medoid_id])
medoid_pattern = random_prototypes[medoid_id]
# start weighted DBA
average_pattern = np.zeros_like(medoid_pattern)
weighted_sums = np.zeros((medoid_pattern.shape[0]))
for nid in nearest_order:
if nid == medoid_id or dtw_matrix[medoid_id, nearest_order[1]] == 0.:
average_pattern += medoid_pattern
weighted_sums += np.ones_like(weighted_sums)
else:
path = dtw.dtw(medoid_pattern, random_prototypes[nid], dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
dtw_value = dtw_matrix[medoid_id, nid]
warped = random_prototypes[nid, path[1]]
weight = np.exp(np.log(0.5)*dtw_value/dtw_matrix[medoid_id, nearest_order[1]])
average_pattern[path[0]] += weight * warped
weighted_sums[path[0]] += weight
ret[i,:] = average_pattern / weighted_sums[:,np.newaxis]
else:
# if verbose > -1:
# print("There is only one pattern of class {}, skipping pattern average".format(l[i]))
ret[i,:] = x[i]
return ret
# Proposed
def random_guided_warp(x, labels, slope_constraint="symmetric", use_window=True, dtw_type="normal", verbose=0):
# use verbose = -1 to turn off warnings
# slope_constraint is for DTW. "symmetric" or "asymmetric"
# dtw_type is for shapeDTW or DTW. "normal" or "shape"
import utils.dtw as dtw
if use_window:
window = np.ceil(x.shape[1] / 10.).astype(int)
else:
window = None
orig_steps = np.arange(x.shape[1])
l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
ret = np.zeros_like(x)
# for i, pat in enumerate(tqdm(x)):
for i, pat in enumerate(x):
# guarentees that same one isnt selected
choices = np.delete(np.arange(x.shape[0]), i)
# remove ones of different classes
choices = np.where(l[choices] == l[i])[0]
if choices.size > 0:
# pick random intra-class pattern
random_prototype = x[np.random.choice(choices)]
if dtw_type == "shape":
path = dtw.shape_dtw(random_prototype, pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
else:
path = dtw.dtw(random_prototype, pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
# Time warp
warped = pat[path[1]]
for dim in range(x.shape[2]):
ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), warped[:,dim]).T
else:
# if verbose > -1:
# print("There is only one pattern of class {}, skipping timewarping".format(l[i]))
ret[i,:] = pat
return ret
def random_guided_warp_shape(x, labels, slope_constraint="symmetric", use_window=True):
return random_guided_warp(x, labels, slope_constraint, use_window, dtw_type="shape")
def discriminative_guided_warp(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, dtw_type="normal", use_variable_slice=True, verbose=0):
# use verbose = -1 to turn off warnings
# slope_constraint is for DTW. "symmetric" or "asymmetric"
# dtw_type is for shapeDTW or DTW. "normal" or "shape"
import utils.dtw as dtw
if use_window:
window = np.ceil(x.shape[1] / 10.).astype(int)
else:
window = None
orig_steps = np.arange(x.shape[1])
l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
positive_batch = np.ceil(batch_size / 2).astype(int)
negative_batch = np.floor(batch_size / 2).astype(int)
ret = np.zeros_like(x)
warp_amount = np.zeros(x.shape[0])
# for i, pat in enumerate(tqdm(x)):
for i, pat in enumerate(x):
# guarentees that same one isnt selected
choices = np.delete(np.arange(x.shape[0]), i)
# remove ones of different classes
positive = np.where(l[choices] == l[i])[0]
negative = np.where(l[choices] != l[i])[0]
if positive.size > 0 and negative.size > 0:
pos_k = min(positive.size, positive_batch)
neg_k = min(negative.size, negative_batch)
positive_prototypes = x[np.random.choice(positive, pos_k, replace=False)]
negative_prototypes = x[np.random.choice(negative, neg_k, replace=False)]
# vector embedding and nearest prototype in one
pos_aves = np.zeros((pos_k))
neg_aves = np.zeros((pos_k))
if dtw_type == "shape":
for p, pos_prot in enumerate(positive_prototypes):
for ps, pos_samp in enumerate(positive_prototypes):
if p != ps:
pos_aves[p] += (1./(pos_k-1.))*dtw.shape_dtw(pos_prot, pos_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
for ns, neg_samp in enumerate(negative_prototypes):
neg_aves[p] += (1./neg_k)*dtw.shape_dtw(pos_prot, neg_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
selected_id = np.argmax(neg_aves - pos_aves)
path = dtw.shape_dtw(positive_prototypes[selected_id], pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
else:
for p, pos_prot in enumerate(positive_prototypes):
for ps, pos_samp in enumerate(positive_prototypes):
if p != ps:
pos_aves[p] += (1./(pos_k-1.))*dtw.dtw(pos_prot, pos_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
for ns, neg_samp in enumerate(negative_prototypes):
neg_aves[p] += (1./neg_k)*dtw.dtw(pos_prot, neg_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
selected_id = np.argmax(neg_aves - pos_aves)
path = dtw.dtw(positive_prototypes[selected_id], pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
# Time warp
warped = pat[path[1]]
warp_path_interp = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), path[1])
warp_amount[i] = np.sum(np.abs(orig_steps-warp_path_interp))
for dim in range(x.shape[2]):
ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), warped[:,dim]).T
else:
# if verbose > -1:
# print("There is only one pattern of class {}".format(l[i]))
ret[i,:] = pat
warp_amount[i] = 0.
if use_variable_slice:
max_warp = np.max(warp_amount)
if max_warp == 0:
# unchanged
ret = window_slice(ret, reduce_ratio=0.9)
else:
for i, pat in enumerate(ret):
# Variable Sllicing
ret[i] = window_slice(pat[np.newaxis,:,:], reduce_ratio=0.9+0.1*warp_amount[i]/max_warp)[0]
return ret
def discriminative_guided_warp_shape(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True):
return discriminative_guided_warp(x, labels, batch_size, slope_constraint, use_window, dtw_type="shape")
def run_augmentation(x, y, args):
print("Augmenting %s"%args.data)
np.random.seed(args.seed)
x_aug = x
y_aug = y
if args.augmentation_ratio > 0:
augmentation_tags = "%d"%args.augmentation_ratio
for n in range(args.augmentation_ratio):
x_temp, augmentation_tags = augment(x, y, args)
x_aug = np.append(x_aug, x_temp, axis=0)
y_aug = np.append(y_aug, y, axis=0)
print("Round %d: %s done"%(n, augmentation_tags))
if args.extra_tag:
augmentation_tags += "_"+args.extra_tag
else:
augmentation_tags = args.extra_tag
return x_aug, y_aug, augmentation_tags
def run_augmentation_single(x, y, args):
# print("Augmenting %s"%args.data)
np.random.seed(args.seed)
x_aug = x
y_aug = y
if len(x.shape)<3:
# Augmenting on the entire series: using the input data as "One Big Batch"
# Before - (sequence_length, num_channels)
# After - (1, sequence_length, num_channels)
# Note: the 'sequence_length' here is actually the length of the entire series
x_input = x[np.newaxis,:]
elif len(x.shape)==3:
# Augmenting on the batch series: keep current dimension (batch_size, sequence_length, num_channels)
x_input = x
else:
raise ValueError("Input must be (batch_size, sequence_length, num_channels) dimensional")
if args.augmentation_ratio > 0:
augmentation_tags = "%d"%args.augmentation_ratio
for n in range(args.augmentation_ratio):
x_aug, augmentation_tags = augment(x_input, y, args)
# print("Round %d: %s done"%(n, augmentation_tags))
if args.extra_tag:
augmentation_tags += "_"+args.extra_tag
else:
augmentation_tags = args.extra_tag
if(len(x.shape)<3):
# Reverse to two-dimensional in whole series augmentation scenario
x_aug = x_aug.squeeze(0)
return x_aug, y_aug, augmentation_tags
def augment(x, y, args):
import utils.augmentation as aug
augmentation_tags = ""
if args.jitter:
x = aug.jitter(x)
augmentation_tags += "_jitter"
if args.scaling:
x = aug.scaling(x)
augmentation_tags += "_scaling"
if args.rotation:
x = aug.rotation(x)
augmentation_tags += "_rotation"
if args.permutation:
x = aug.permutation(x)
augmentation_tags += "_permutation"
if args.randompermutation:
x = aug.permutation(x, seg_mode="random")
augmentation_tags += "_randomperm"
if args.magwarp:
x = aug.magnitude_warp(x)
augmentation_tags += "_magwarp"
if args.timewarp:
x = aug.time_warp(x)
augmentation_tags += "_timewarp"
if args.windowslice:
x = aug.window_slice(x)
augmentation_tags += "_windowslice"
if args.windowwarp:
x = aug.window_warp(x)
augmentation_tags += "_windowwarp"
if args.spawner:
x = aug.spawner(x, y)
augmentation_tags += "_spawner"
if args.dtwwarp:
x = aug.random_guided_warp(x, y)
augmentation_tags += "_rgw"
if args.shapedtwwarp:
x = aug.random_guided_warp_shape(x, y)
augmentation_tags += "_rgws"
if args.wdba:
x = aug.wdba(x, y)
augmentation_tags += "_wdba"
if args.discdtw:
x = aug.discriminative_guided_warp(x, y)
augmentation_tags += "_dgw"
if args.discsdtw:
x = aug.discriminative_guided_warp_shape(x, y)
augmentation_tags += "_dgws"
return x, augmentation_tags
================================================
FILE: utils/dtw.py
================================================
__author__ = 'Brian Iwana'
import numpy as np
import math
import sys
RETURN_VALUE = 0
RETURN_PATH = 1
RETURN_ALL = -1
# Core DTW
def _traceback(DTW, slope_constraint):
i, j = np.array(DTW.shape) - 1
p, q = [i-1], [j-1]
if slope_constraint == "asymmetric":
while (i > 1):
tb = np.argmin((DTW[i-1, j], DTW[i-1, j-1], DTW[i-1, j-2]))
if (tb == 0):
i = i - 1
elif (tb == 1):
i = i - 1
j = j - 1
elif (tb == 2):
i = i - 1
j = j - 2
p.insert(0, i-1)
q.insert(0, j-1)
elif slope_constraint == "symmetric":
while (i > 1 or j > 1):
tb = np.argmin((DTW[i-1, j-1], DTW[i-1, j], DTW[i, j-1]))
if (tb == 0):
i = i - 1
j = j - 1
elif (tb == 1):
i = i - 1
elif (tb == 2):
j = j - 1
p.insert(0, i-1)
q.insert(0, j-1)
else:
sys.exit("Unknown slope constraint %s"%slope_constraint)
return (np.array(p), np.array(q))
def dtw(prototype, sample, return_flag = RETURN_VALUE, slope_constraint="asymmetric", window=None):
""" Computes the DTW of two sequences.
:param prototype: np array [0..b]
:param sample: np array [0..t]
:param extended: bool
"""
p = prototype.shape[0]
assert p != 0, "Prototype empty!"
s = sample.shape[0]
assert s != 0, "Sample empty!"
if window is None:
window = s
cost = np.full((p, s), np.inf)
for i in range(p):
start = max(0, i-window)
end = min(s, i+window)+1
cost[i,start:end]=np.linalg.norm(sample[start:end] - prototype[i], axis=1)
DTW = _cummulative_matrix(cost, slope_constraint, window)
if return_flag == RETURN_ALL:
return DTW[-1,-1], cost, DTW[1:,1:], _traceback(DTW, slope_constraint)
elif return_flag == RETURN_PATH:
return _traceback(DTW, slope_constraint)
else:
return DTW[-1,-1]
def _cummulative_matrix(cost, slope_constraint, window):
p = cost.shape[0]
s = cost.shape[1]
# Note: DTW is one larger than cost and the original patterns
DTW = np.full((p+1, s+1), np.inf)
DTW[0, 0] = 0.0
if slope_constraint == "asymmetric":
for i in range(1, p+1):
if i <= window+1:
DTW[i,1] = cost[i-1,0] + min(DTW[i-1,0], DTW[i-1,1])
for j in range(max(2, i-window), min(s, i+window)+1):
DTW[i,j] = cost[i-1,j-1] + min(DTW[i-1,j-2], DTW[i-1,j-1], DTW[i-1,j])
elif slope_constraint == "symmetric":
for i in range(1, p+1):
for j in range(max(1, i-window), min(s, i+window)+1):
DTW[i,j] = cost[i-1,j-1] + min(DTW[i-1,j-1], DTW[i,j-1], DTW[i-1,j])
else:
sys.exit("Unknown slope constraint %s"%slope_constraint)
return DTW
def shape_dtw(prototype, sample, return_flag = RETURN_VALUE, slope_constraint="asymmetric", window=None, descr_ratio=0.05):
""" Computes the shapeDTW of two sequences.
:param prototype: np array [0..b]
:param sample: np array [0..t]
:param extended: bool
"""
# shapeDTW
# https://www.sciencedirect.com/science/article/pii/S0031320317303710
p = prototype.shape[0]
assert p != 0, "Prototype empty!"
s = sample.shape[0]
assert s != 0, "Sample empty!"
if window is None:
window = s
p_feature_len = np.clip(np.round(p * descr_ratio), 5, 100).astype(int)
s_feature_len = np.clip(np.round(s * descr_ratio), 5, 100).astype(int)
# padding
p_pad_front = (np.ceil(p_feature_len / 2.)).astype(int)
p_pad_back = (np.floor(p_feature_len / 2.)).astype(int)
s_pad_front = (np.ceil(s_feature_len / 2.)).astype(int)
s_pad_back = (np.floor(s_feature_len / 2.)).astype(int)
prototype_pad = np.pad(prototype, ((p_pad_front, p_pad_back), (0, 0)), mode="edge")
sample_pad = np.pad(sample, ((s_pad_front, s_pad_back), (0, 0)), mode="edge")
p_p = prototype_pad.shape[0]
s_p = sample_pad.shape[0]
cost = np.full((p, s), np.inf)
for i in range(p):
for j in range(max(0, i-window), min(s, i+window)):
cost[i, j] = np.linalg.norm(sample_pad[j:j+s_feature_len] - prototype_pad[i:i+p_feature_len])
DTW = _cummulative_matrix(cost, slope_constraint=slope_constraint, window=window)
if return_flag == RETURN_ALL:
return DTW[-1,-1], cost, DTW[1:,1:], _traceback(DTW, slope_constraint)
elif return_flag == RETURN_PATH:
return _traceback(DTW, slope_constraint)
else:
return DTW[-1,-1]
# Draw helpers
def draw_graph2d(cost, DTW, path, prototype, sample):
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
# plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01)
#cost
plt.subplot(2, 3, 1)
plt.imshow(cost.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
plt.plot(path[0], path[1], 'y')
plt.xlim((-0.5, cost.shape[0]-0.5))
plt.ylim((-0.5, cost.shape[0]-0.5))
#dtw
plt.subplot(2, 3, 2)
plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
plt.plot(path[0]+1, path[1]+1, 'y')
plt.xlim((-0.5, DTW.shape[0]-0.5))
plt.ylim((-0.5, DTW.shape[0]-0.5))
#prototype
plt.subplot(2, 3, 4)
plt.plot(prototype[:,0], prototype[:,1], 'b-o')
#connection
plt.subplot(2, 3, 5)
for i in range(0,path[0].shape[0]):
plt.plot([prototype[path[0][i],0], sample[path[1][i],0]],[prototype[path[0][i],1], sample[path[1][i],1]], 'y-')
plt.plot(sample[:,0], sample[:,1], 'g-o')
plt.plot(prototype[:,0], prototype[:,1], 'b-o')
#sample
plt.subplot(2, 3, 6)
plt.plot(sample[:,0], sample[:,1], 'g-o')
plt.tight_layout()
plt.show()
def draw_graph1d(cost, DTW, path, prototype, sample):
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
# plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01)
p_steps = np.arange(prototype.shape[0])
s_steps = np.arange(sample.shape[0])
#cost
plt.subplot(2, 3, 1)
plt.imshow(cost.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
plt.plot(path[0], path[1], 'y')
plt.xlim((-0.5, cost.shape[0]-0.5))
plt.ylim((-0.5, cost.shape[0]-0.5))
#dtw
plt.subplot(2, 3, 2)
plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
plt.plot(path[0]+1, path[1]+1, 'y')
plt.xlim((-0.5, DTW.shape[0]-0.5))
plt.ylim((-0.5, DTW.shape[0]-0.5))
#prototype
plt.subplot(2, 3, 4)
plt.plot(p_steps, prototype[:,0], 'b-o')
#connection
plt.subplot(2, 3, 5)
for i in range(0,path[0].shape[0]):
plt.plot([path[0][i], path[1][i]],[prototype[path[0][i],0], sample[path[1][i],0]], 'y-')
plt.plot(p_steps, sample[:,0], 'g-o')
plt.plot(s_steps, prototype[:,0], 'b-o')
#sample
plt.subplot(2, 3, 6)
plt.plot(s_steps, sample[:,0], 'g-o')
plt.tight_layout()
plt.show()
================================================
FILE: utils/dtw_metric.py
================================================
from numpy import array, zeros, full, argmin, inf, ndim
from scipy.spatial.distance import cdist
from math import isinf
def dtw(x, y, dist, warp=1, w=inf, s=1.0):
"""
Computes Dynamic Time Warping (DTW) of two sequences.
:param array x: N1*M array
:param array y: N2*M array
:param func dist: distance used as cost measure
:param int warp: how many shifts are computed.
:param int w: window size limiting the maximal distance between indices of matched entries |i,j|.
:param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal
Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
"""
assert len(x)
assert len(y)
assert isinf(w) or (w >= abs(len(x) - len(y)))
assert s > 0
r, c = len(x), len(y)
if not isinf(w):
D0 = full((r + 1, c + 1), inf)
for i in range(1, r + 1):
D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0
D0[0, 0] = 0
else:
D0 = zeros((r + 1, c + 1))
D0[0, 1:] = inf
D0[1:, 0] = inf
D1 = D0[1:, 1:] # view
for i in range(r):
for j in range(c):
if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))):
D1[i, j] = dist(x[i], y[j])
C = D1.copy()
jrange = range(c)
for i in range(r):
if not isinf(w):
jrange = range(max(0, i - w), min(c, i + w + 1))
for j in jrange:
min_list = [D0[i, j]]
for k in range(1, warp + 1):
i_k = min(i + k, r)
j_k = min(j + k, c)
min_list += [D0[i_k, j] * s, D0[i, j_k] * s]
D1[i, j] += min(min_list)
if len(x) == 1:
path = zeros(len(y)), range(len(y))
elif len(y) == 1:
path = range(len(x)), zeros(len(x))
else:
path = _traceback(D0)
return D1[-1, -1], C, D1, path
def accelerated_dtw(x, y, dist, warp=1):
"""
Computes Dynamic Time Warping (DTW) of two sequences in a faster way.
Instead of iterating through each element and calculating each distance,
this uses the cdist function from scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html)
:param array x: N1*M array
:param array y: N2*M array
:param string or func dist: distance parameter for cdist. When string is given, cdist uses optimized functions for the distance metrics.
If a string is passed, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
:param int warp: how many shifts are computed.
Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
"""
assert len(x)
assert len(y)
if ndim(x) == 1:
x = x.reshape(-1, 1)
if ndim(y) == 1:
y = y.reshape(-1, 1)
r, c = len(x), len(y)
D0 = zeros((r + 1, c + 1))
D0[0, 1:] = inf
D0[1:, 0] = inf
D1 = D0[1:, 1:]
D0[1:, 1:] = cdist(x, y, dist)
C = D1.copy()
for i in range(r):
for j in range(c):
min_list = [D0[i, j]]
for k in range(1, warp + 1):
min_list += [D0[min(i + k, r), j],
D0[i, min(j + k, c)]]
D1[i, j] += min(min_list)
if len(x) == 1:
path = zeros(len(y)), range(len(y))
elif len(y) == 1:
path = range(len(x)), zeros(len(x))
else:
path = _traceback(D0)
return D1[-1, -1], C, D1, path
def _traceback(D):
i, j = array(D.shape) - 2
p, q = [i], [j]
while (i > 0) or (j > 0):
tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
if tb == 0:
i -= 1
j -= 1
elif tb == 1:
i -= 1
else: # (tb == 2):
j -= 1
p.insert(0, i)
q.insert(0, j)
return array(p), array(q)
if __name__ == '__main__':
w = inf
s = 1.0
if 1: # 1-D numeric
from sklearn.metrics.pairwise import manhattan_distances
x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
dist_fun = manhattan_distances
w = 1
# s = 1.2
elif 0: # 2-D numeric
from sklearn.metrics.pairwise import euclidean_distances
x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]]
y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]]
dist_fun = euclidean_distances
else: # 1-D list of strings
from nltk.metrics.distance import edit_distance
# x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder']
# y = ['class', 'too']
x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls']
y = ['see', 'drown', 'himself']
# x = 'we talked about the situation'.split()
# y = 'we talked about the situation'.split()
dist_fun = edit_distance
dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s)
# Vizualize
from matplotlib import pyplot as plt
plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest')
plt.plot(path[0], path[1], '-o') # relation
plt.xticks(range(len(x)), x)
plt.yticks(range(len(y)), y)
plt.xlabel('x')
plt.ylabel('y')
plt.axis('tight')
if isinf(w):
plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s))
else:
plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s))
plt.show()
================================================
FILE: utils/losses.py
================================================
# This source code is provided for the purposes of scientific reproducibility
# under the following limited license from Element AI Inc. The code is an
# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
# expansion analysis for interpretable time series forecasting,
# https://arxiv.org/abs/1905.10437). The copyright to the source code is
# licensed under the Creative Commons - Attribution-NonCommercial 4.0
# International license (CC BY-NC 4.0):
# https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
# for the benefit of third parties or internally in production) requires an
# explicit license. The subject-matter of the N-BEATS model and associated
# materials are the property of Element AI Inc. and may be subject to patent
# protection. No license to patents is granted hereunder (whether express or
# implied). Copyright © 2020 Element AI Inc. All rights reserved.
"""
Loss functions for PyTorch.
"""
import torch as t
import torch.nn as nn
import numpy as np
import pdb
def divide_no_nan(a, b):
"""
a/b where the resulted NaN or Inf are replaced by 0.
"""
result = a / b
result[result != result] = .0
result[result == np.inf] = .0
return result
class mape_loss(nn.Module):
def __init__(self):
super(mape_loss, self).__init__()
def forward(self, insample: t.Tensor, freq: int,
forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
"""
MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
:param forecast: Forecast values. Shape: batch, time
:param target: Target values. Shape: batch, time
:param mask: 0/1 mask. Shape: batch, time
:return: Loss value
"""
weights = divide_no_nan(mask, target)
return t.mean(t.abs((forecast - target) * weights))
class smape_loss(nn.Module):
def __init__(self):
super(smape_loss, self).__init__()
def forward(self, insample: t.Tensor, freq: int,
forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
"""
sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
:param forecast: Forecast values. Shape: batch, time
:param target: Target values. Shape: batch, time
:param mask: 0/1 mask. Shape: batch, time
:return: Loss value
"""
return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
t.abs(forecast.data) + t.abs(target.data)) * mask)
class mase_loss(nn.Module):
def __init__(self):
super(mase_loss, self).__init__()
def forward(self, insample: t.Tensor, freq: int,
forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
"""
MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
:param insample: Insample values. Shape: batch, time_i
:param freq: Frequency value
:param forecast: Forecast values. Shape: batch, time_o
:param target: Target values. Shape: batch, time_o
:param mask: 0/1 mask. Shape: batch, time_o
:return: Loss value
"""
masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
masked_masep_inv = divide_no_nan(mask, masep[:, None])
return t.mean(t.abs(target - forecast) * masked_masep_inv)
================================================
FILE: utils/m4_summary.py
================================================
# This source code is provided for the purposes of scientific reproducibility
# under the following limited license from Element AI Inc. The code is an
# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
# expansion analysis for interpretable time series forecasting,
# https://arxiv.org/abs/1905.10437). The copyright to the source code is
# licensed under the Creative Commons - Attribution-NonCommercial 4.0
# International license (CC BY-NC 4.0):
# https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
# for the benefit of third parties or internally in production) requires an
# explicit license. The subject-matter of the N-BEATS model and associated
# materials are the property of Element AI Inc. and may be subject to patent
# protection. No license to patents is granted hereunder (whether express or
# implied). Copyright 2020 Element AI Inc. All rights reserved.
"""
M4 Summary
"""
from collections import OrderedDict
import numpy as np
import pandas as pd
from data_provider.m4 import M4Dataset
from data_provider.m4 import M4Meta
import os
def group_values(values, groups, group_name):
return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
def mase(forecast, insample, outsample, frequency):
return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
def smape_2(forecast, target):
denom = np.abs(target) + np.abs(forecast)
# divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
denom[denom == 0.0] = 1.0
return 200 * np.abs(forecast - target) / denom
def mape(forecast, target):
denom = np.abs(target)
# divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
denom[denom == 0.0] = 1.0
return 100 * np.abs(forecast - target) / denom
class M4Summary:
def __init__(self, file_path, root_path):
self.file_path = file_path
self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
def evaluate(self):
"""
Evaluate forecasts using M4 test dataset.
:param forecast: Forecasts. Shape: timeseries, time.
:return: sMAPE and OWA grouped by seasonal patterns.
"""
grouped_owa = OrderedDict()
naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
model_mases = {}
naive2_smapes = {}
naive2_mases = {}
grouped_smapes = {}
grouped_mapes = {}
for group_name in M4Meta.seasonal_patterns:
file_name = self.file_path + group_name + "_forecast.csv"
if os.path.exists(file_name):
model_forecast = pd.read_csv(file_name).values
naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
target = group_values(self.test_set.values, self.test_set.groups, group_name)
# all timeseries within group have same frequency
frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
insample = group_values(self.training_set.values, self.test_set.groups, group_name)
model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
insample=insample[i],
outsample=target[i],
frequency=frequency) for i in range(len(model_forecast))])
naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
insample=insample[i],
outsample=target[i],
frequency=frequency) for i in range(len(model_forecast))])
naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
grouped_smapes = self.summarize_groups(grouped_smapes)
grouped_mapes = self.summarize_groups(grouped_mapes)
grouped_model_mases = self.summarize_groups(model_mases)
grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
grouped_naive2_mases = self.summarize_groups(naive2_mases)
for k in grouped_model_mases.keys():
grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
def round_all(d):
return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
grouped_model_mases)
def summarize_groups(self, scores):
"""
Re-group scores respecting M4 rules.
:param scores: Scores per group.
:return: Grouped scores.
"""
scores_summary = OrderedDict()
def group_count(group_name):
return len(np.where(self.test_set.groups == group_name)[0])
weighted_score = {}
for g in ['Yearly', 'Quarterly', 'Monthly']:
weighted_score[g] = scores[g] * group_count(g)
scores_summary[g] = scores[g]
others_score = 0
others_count = 0
for g in ['Weekly', 'Daily', 'Hourly']:
others_score += scores[g] * group_count(g)
others_count += group_count(g)
weighted_score['Others'] = others_score
scores_summary['Others'] = others_score / others_count
average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
scores_summary['Average'] = average
return scores_summary
================================================
FILE: utils/masking.py
================================================
import torch
class TriangularCausalMask():
def __init__(self, B, L, device="cpu"):
mask_shape = [B, 1, L, L]
with torch.no_grad():
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
@property
def mask(self):
return self._mask
class ProbMask():
def __init__(self, B, H, L, index, scores, device="cpu"):
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
indicator = _mask_ex[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :].to(device)
self._mask = indicator.view(scores.shape).to(device)
@property
def mask(self):
return self._mask
================================================
FILE: utils/metrics.py
================================================
import numpy as np
def RSE(pred, true):
return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
def CORR(pred, true):
u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
return (u / d).mean(-1)
def MAE(pred, true):
return np.mean(np.abs(true - pred))
def MSE(pred, true):
return np.mean((true - pred) ** 2)
def RMSE(pred, true):
return np.sqrt(MSE(pred, true))
def MAPE(pred, true):
return np.mean(np.abs((true - pred) / true))
def MSPE(pred, true):
return np.mean(np.square((true - pred) / true))
def metric(pred, true):
mae = MAE(pred, true)
mse = MSE(pred, true)
rmse = RMSE(pred, true)
mape = MAPE(pred, true)
mspe = MSPE(pred, true)
return mae, mse, rmse, mape, mspe
================================================
FILE: utils/print_args.py
================================================
def print_args(args):
print("\033[1m" + "Basic Config" + "\033[0m")
print(f' {"Task Name:":<20}{args.task_name:<20}{"Is Training:":<20}{args.is_training:<20}')
print(f' {"Model ID:":<20}{args.model_id:<20}{"Model:":<20}{args.model:<20}')
print()
print("\033[1m" + "Data Loader" + "\033[0m")
print(f' {"Data:":<20}{args.data:<20}{"Root Path:":<20}{args.root_path:<20}')
print(f' {"Data Path:":<20}{args.data_path:<20}{"Features:":<20}{args.features:<20}')
print(f' {"Target:":<20}{args.target:<20}{"Freq:":<20}{args.freq:<20}')
print(f' {"Checkpoints:":<20}{args.checkpoints:<20}')
print()
if args.task_name in ['long_term_forecast', 'short_term_forecast']:
print("\033[1m" + "Forecasting Task" + "\033[0m")
print(f' {"Seq Len:":<20}{args.seq_len:<20}{"Label Len:":<20}{args.label_len:<20}')
print(f' {"Pred Len:":<20}{args.pred_len:<20}{"Seasonal Patterns:":<20}{args.seasonal_patterns:<20}')
print(f' {"Inverse:":<20}{args.inverse:<20}')
print()
if args.task_name == 'imputation':
print("\033[1m" + "Imputation Task" + "\033[0m")
print(f' {"Mask Rate:":<20}{args.mask_rate:<20}')
print()
if args.task_name == 'anomaly_detection':
print("\033[1m" + "Anomaly Detection Task" + "\033[0m")
print(f' {"Anomaly Ratio:":<20}{args.anomaly_ratio:<20}')
print()
print("\033[1m" + "Model Parameters" + "\033[0m")
print(f' {"Top k:":<20}{args.top_k:<20}{"Num Kernels:":<20}{args.num_kernels:<20}')
print(f' {"Enc In:":<20}{args.enc_in:<20}{"Dec In:":<20}{args.dec_in:<20}')
print(f' {"C Out:":<20}{args.c_out:<20}{"d model:":<20}{args.d_model:<20}')
print(f' {"n heads:":<20}{args.n_heads:<20}{"e layers:":<20}{args.e_layers:<20}')
print(f' {"d layers:":<20}{args.d_layers:<20}{"d FF:":<20}{args.d_ff:<20}')
print(f' {"Moving Avg:":<20}{args.moving_avg:<20}{"Factor:":<20}{args.factor:<20}')
print(f' {"Distil:":<20}{args.distil:<20}{"Dropout:":<20}{args.dropout:<20}')
print(f' {"Embed:":<20}{args.embed:<20}{"Activation:":<20}{args.activation:<20}')
print()
print("\033[1m" + "Run Parameters" + "\033[0m")
print(f' {"Num Workers:":<20}{args.num_workers:<20}{"Itr:":<20}{args.itr:<20}')
print(f' {"Train Epochs:":<20}{args.train_epochs:<20}{"Batch Size:":<20}{args.batch_size:<20}')
print(f' {"Patience:":<20}{args.patience:<20}{"Learning Rate:":<20}{args.learning_rate:<20}')
print(f' {"Des:":<20}{args.des:<20}{"Loss:":<20}{args.loss:<20}')
print(f' {"Lradj:":<20}{args.lradj:<20}{"Use Amp:":<20}{args.use_amp:<20}')
print()
print("\033[1m" + "GPU" + "\033[0m")
print(f' {"Use GPU:":<20}{args.use_gpu:<20}{"GPU:":<20}{args.gpu:<20}')
print(f' {"Use Multi GPU:":<20}{args.use_multi_gpu:<20}{"Devices:":<20}{args.devices:<20}')
print()
print("\033[1m" + "De-stationary Projector Params" + "\033[0m")
p_hidden_dims_str = ', '.join(map(str, args.p_hidden_dims))
print(f' {"P Hidden Dims:":<20}{p_hidden_dims_str:<20}{"P Hidden Layers:":<20}{args.p_hidden_layers:<20}')
print()
================================================
FILE: utils/timefeatures.py
================================================
# From: gluonts/src/gluonts/time_feature/_base.py
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from typing import List
import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
class TimeFeature:
def __init__(self):
pass
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
pass
def __repr__(self):
return self.__class__.__name__ + "()"
class SecondOfMinute(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.second / 59.0 - 0.5
class MinuteOfHour(TimeFeature):
"""Minute of hour encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.minute / 59.0 - 0.5
class HourOfDay(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.hour / 23.0 - 0.5
class DayOfWeek(TimeFeature):
"""Hour of day encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.dayofweek / 6.0 - 0.5
class DayOfMonth(TimeFeature):
"""Day of month encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.day - 1) / 30.0 - 0.5
class DayOfYear(TimeFeature):
"""Day of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.dayofyear - 1) / 365.0 - 0.5
class MonthOfYear(TimeFeature):
"""Month of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.month - 1) / 11.0 - 0.5
class WeekOfYear(TimeFeature):
"""Week of year encoded as value between [-0.5, 0.5]"""
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.isocalendar().week - 1) / 52.0 - 0.5
def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
"""
Returns a list of time features that will be appropriate for the given frequency string.
Parameters
----------
freq_str
Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
"""
features_by_offsets = {
offsets.YearEnd: [],
offsets.QuarterEnd: [MonthOfYear],
offsets.MonthEnd: [MonthOfYear],
offsets.Week: [DayOfMonth, WeekOfYear],
offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
offsets.Minute: [
MinuteOfHour,
HourOfDay,
DayOfWeek,
DayOfMonth,
DayOfYear,
],
offsets.Second: [
SecondOfMinute,
MinuteOfHour,
HourOfDay,
DayOfWeek,
DayOfMonth,
DayOfYear,
],
}
offset = to_offset(freq_str)
for offset_type, feature_classes in features_by_offsets.items():
if isinstance(offset, offset_type):
return [cls() for cls in feature_classes]
supported_freq_msg = f"""
Unsupported frequency {freq_str}
The following frequencies are supported:
Y - yearly
alias: A
M - monthly
W - weekly
D - daily
B - business days
H - hourly
T - minutely
alias: min
S - secondly
"""
raise RuntimeError(supported_freq_msg)
def time_features(dates, freq='h'):
return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
================================================
FILE: utils/tools.py
================================================
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd
import math
plt.switch_backend('agg')
def adjust_learning_rate(optimizer, epoch, args):
# lr = args.learning_rate * (0.2 ** (epoch // 2))
if args.lradj == 'type1':
lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
elif args.lradj == 'type2':
lr_adjust = {
2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
10: 5e-7, 15: 1e-7, 20: 5e-8
}
elif args.lradj == 'type3':
lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
elif args.lradj == "cosine":
lr_adjust = {epoch: args.learning_rate /2 * (1 + math.cos(epoch / args.train_epochs * math.pi))}
if epoch in lr_adjust.keys():
lr = lr_adjust[epoch]
for param_group in optimizer.param_groups:
param_group['lr'] = lr
print('Updating learning rate to {}'.format(lr))
class EarlyStopping:
def __init__(self, patience=7, verbose=False, delta=0):
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.inf
self.delta = delta
def __call__(self, val_loss, model, path):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
elif score < self.best_score + self.delta:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model, path)
self.counter = 0
def save_checkpoint(self, val_loss, model, path):
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
self.val_loss_min = val_loss
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
class StandardScaler():
def __init__(self, mean, std):
self.mean = mean
self.std = std
def transform(self, data):
return (data - self.mean) / self.std
def inverse_transform(self, data):
return (data * self.std) + self.mean
def visual(true, preds=None, name='./pic/test.pdf'):
"""
Results visualization
"""
plt.figure()
if preds is not None:
plt.plot(preds, label='Prediction', linewidth=2)
plt.plot(true, label='GroundTruth', linewidth=2)
plt.legend()
plt.savefig(name, bbox_inches='tight')
def adjustment(gt, pred):
anomaly_state = False
for i in range(len(gt)):
if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
anomaly_state = True
for j in range(i, -1, -1):
if gt[j] == 0:
break
else:
if pred[j] == 0:
pred[j] = 1
for j in range(i, len(gt)):
if gt[j] == 0:
break
else:
if pred[j] == 0:
pred[j] = 1
elif gt[i] == 0:
anomaly_state = False
if anomaly_state:
pred[i] = 1
return gt, pred
def cal_accuracy(y_pred, y_true):
return np.mean(y_pred == y_true)