Repository: neuraloperator/physics_informed
Branch: master
Commit: 3b6bc307c63c
Files: 252
Total size: 505.8 KB

Directory structure:
gitextract_nppznbmv/

├── .dockerignore
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── baselines/
│   ├── __init__.py
│   ├── data.py
│   ├── deepxde_deeponet.py
│   ├── loss.py
│   ├── model.py
│   ├── pinns_ns_05s.py
│   ├── pinns_ns_50s.py
│   ├── sapinns-50s.py
│   ├── sapinns.py
│   ├── test.py
│   ├── tqd_sapinns.py
│   ├── tqd_utils.py
│   ├── train_darcy.py
│   ├── train_ns.py
│   ├── unet3d.py
│   └── utils.py
├── cavity_flow.py
├── configs/
│   ├── baseline/
│   │   ├── NS-50s-LAAF.yaml
│   │   ├── NS-50s.yaml
│   │   ├── Re500-05s-deeponet.yaml
│   │   ├── Re500-pinns-05s-LAAF.yaml
│   │   ├── Re500-pinns-05s-SA.yaml
│   │   ├── Re500-pinns-05s.yaml
│   │   └── Re500-pinns.yaml
│   ├── finetune/
│   │   ├── Darcy-finetune.yaml
│   │   ├── Re100-finetune-1s.yaml
│   │   ├── Re200-finetune-1s.yaml
│   │   ├── Re250-finetune-1s.yaml
│   │   ├── Re300-finetune-1s.yaml
│   │   ├── Re350-finetune-1s.yaml
│   │   ├── Re400-finetune-1s.yaml
│   │   ├── Re500-finetune-05s-2layer.yaml
│   │   ├── Re500-finetune-05s-eqn.yaml
│   │   ├── Re500-finetune-05s4C0.yaml
│   │   ├── Re500-finetune-05s4C1.yaml
│   │   ├── Re500-finetune-05s4C4.yaml
│   │   ├── Re500-finetune-05s4k-2layer.yaml
│   │   ├── Re500-finetune-05s4k1k.yaml
│   │   ├── Re500-finetune-05s4k4-2layer.yaml
│   │   ├── Re500-finetune-05s4k4k.yaml
│   │   └── Re500-finetune-1s.yaml
│   ├── instance/
│   │   ├── Re500-1_8-FNO.yaml
│   │   ├── Re500-1_8-PINO-s.yaml
│   │   └── Re500-1_8-PINO.yaml
│   ├── ngc/
│   │   ├── Re500-1_8-dat0-PINO.yaml
│   │   ├── Re500-1_8-dat200-PINO.yaml
│   │   ├── Re500-1_8-dat40-PINO.yaml
│   │   ├── Re500-1_8-dat400-PINO.yaml
│   │   ├── Re500-1_8-dat80-PINO.yaml
│   │   ├── Re500-1_8-dat800-PINO.yaml
│   │   ├── Re500-1_8-res16-PINO.yaml
│   │   └── Re500-1_8-res32-PINO.yaml
│   ├── operator/
│   │   ├── Darcy-pretrain.yaml
│   │   ├── Re500-05s-1000-FNO.yaml
│   │   ├── Re500-05s-1000-PINO.yaml
│   │   ├── Re500-05s-3000-FNO.yaml
│   │   ├── Re500-05s-600-FNO.yaml
│   │   ├── Re500-05s-600-PINO-xl.yaml
│   │   ├── Re500-05s-600-PINO.yaml
│   │   ├── Re500-05s-FNO.yaml
│   │   ├── Re500-1_16-800-FNO-s.yaml
│   │   ├── Re500-1_16-800-PINO-s.yaml
│   │   ├── Re500-1_4-2000-FNO.yaml
│   │   ├── Re500-1_8-0-PINO-s.yaml
│   │   ├── Re500-1_8-1200-FNO.yaml
│   │   ├── Re500-1_8-1200-PINO.yaml
│   │   ├── Re500-1_8-200-FNO-s.yaml
│   │   ├── Re500-1_8-2000-FNO-s.yaml
│   │   ├── Re500-1_8-2000-FNO-xl.yaml
│   │   ├── Re500-1_8-2000-PINO.yaml
│   │   ├── Re500-1_8-2200-FNO-s.yaml
│   │   ├── Re500-1_8-2200-PINO-s.yaml
│   │   ├── Re500-1_8-800-FNO-s.yaml
│   │   ├── Re500-1_8-800-FNO-s32.yaml
│   │   ├── Re500-1_8-800-PINO-s.yaml
│   │   ├── Re500-1_8-800-PINO-s16.yaml
│   │   ├── Re500-1_8-800-PINO-s32.yaml
│   │   ├── Re500-1_8-800-UNet.yaml
│   │   ├── Re500-1_8-dat1.6k-PINO.yaml
│   │   ├── Re500-1_8-dat400-FNO.yaml
│   │   ├── Re500-1s-FNO.yaml
│   │   ├── Re500-3000-FNO.yaml
│   │   ├── Re500-3000-PINO.yaml
│   │   ├── Re500-4000-FNO.yaml
│   │   ├── Re500-FNO.yaml
│   │   └── Re500-PINO.yaml
│   ├── pretrain/
│   │   ├── Darcy-pretrain-deeponet.yaml
│   │   ├── Darcy-pretrain.yaml
│   │   ├── Re100-pretrain-1s.yaml
│   │   ├── Re200-pretrain-1s.yaml
│   │   ├── Re250-pretrain-1s.yaml
│   │   ├── Re300-pretrain-1s.yaml
│   │   ├── Re350-pretrain-1s.yaml
│   │   ├── Re400-pretrain-1s.yaml
│   │   ├── Re500-05s-deeponet.yaml
│   │   ├── Re500-FNO-1s-100.yaml
│   │   ├── Re500-FNO-1s-200.yaml
│   │   ├── Re500-FNO-1s-400.yaml
│   │   ├── Re500-PINO-1s-100-4v4.yaml
│   │   ├── Re500-PINO-1s-200-4v4.yaml
│   │   ├── Re500-PINO-1s-400-1v1.yaml
│   │   ├── Re500-pretrain-05s-4C1.yaml
│   │   ├── Re500-pretrain-05s-4C4.yaml
│   │   ├── Re500-pretrain-05s-eqn.yaml
│   │   ├── Re500-pretrain-1s.yaml
│   │   └── burgers-pretrain.yaml
│   ├── scratch/
│   │   ├── Re100-scratch-1s.yaml
│   │   ├── Re200-scratch-1s.yaml
│   │   ├── Re250-scratch-1s.yaml
│   │   ├── Re300-scratch-1s.yaml
│   │   ├── Re350-scratch-1s.yaml
│   │   ├── Re400-scratch-1s.yaml
│   │   ├── Re500-scratch-05s-new.yaml
│   │   ├── Re500-scratch-05s.yaml
│   │   ├── Re500-scratch-1s-progressive.yaml
│   │   └── Re500-scratch-1s.yaml
│   ├── test/
│   │   ├── Re500-05s-deeponet.yaml
│   │   ├── Re500-05s-test.yaml
│   │   ├── Re500-05s.yaml
│   │   ├── Re500-1s-100.yaml
│   │   ├── burgers.yaml
│   │   ├── darcy-deeponet.yaml
│   │   └── darcy.yaml
│   └── transfer/
│       ├── Re100to100-1s.yaml
│       ├── Re100to200-1s.yaml
│       ├── Re100to250-1s.yaml
│       ├── Re100to300-1s.yaml
│       ├── Re100to350-1s.yaml
│       ├── Re100to400-1s.yaml
│       ├── Re100to500-1s.yaml
│       ├── Re200to100-1s.yaml
│       ├── Re200to200-1s.yaml
│       ├── Re200to250-1s.yaml
│       ├── Re200to300-1s.yaml
│       ├── Re200to350-1s.yaml
│       ├── Re200to400-1s.yaml
│       ├── Re200to500-1s.yaml
│       ├── Re250to100-1s.yaml
│       ├── Re250to200-1s.yaml
│       ├── Re250to250-1s.yaml
│       ├── Re250to300-1s.yaml
│       ├── Re250to350-1s.yaml
│       ├── Re250to400-1s.yaml
│       ├── Re250to500-1s.yaml
│       ├── Re300to100-1s.yaml
│       ├── Re300to200-1s.yaml
│       ├── Re300to250-1s.yaml
│       ├── Re300to300-1s.yaml
│       ├── Re300to350-1s.yaml
│       ├── Re300to400-1s.yaml
│       ├── Re300to500-1s.yaml
│       ├── Re350to100-1s.yaml
│       ├── Re350to200-1s.yaml
│       ├── Re350to250-1s.yaml
│       ├── Re350to300-1s.yaml
│       ├── Re350to350-1s.yaml
│       ├── Re350to400-1s.yaml
│       ├── Re350to500-1s.yaml
│       ├── Re400to100-1s.yaml
│       ├── Re400to200-1s.yaml
│       ├── Re400to250-1s.yaml
│       ├── Re400to300-1s.yaml
│       ├── Re400to350-1s.yaml
│       ├── Re400to400-1s.yaml
│       ├── Re400to500-1s.yaml
│       ├── Re500to100-1s.yaml
│       ├── Re500to200-1s.yaml
│       ├── Re500to250-1s.yaml
│       ├── Re500to300-1s.yaml
│       ├── Re500to350-1s.yaml
│       ├── Re500to400-1s.yaml
│       ├── Re500to500-05s-new.yaml
│       ├── Re500to500-05s.yaml
│       └── Re500to500-1s.yaml
├── deeponet.py
├── download_data.py
├── eval_operator.py
├── generate_data.py
├── inference.py
├── instance_opt.py
├── inverse-darcy-foward.py
├── inverse-darcy.py
├── models/
│   ├── FCN.py
│   ├── __init__.py
│   ├── basics.py
│   ├── core.py
│   ├── fourier1d.py
│   ├── fourier2d.py
│   ├── fourier3d.py
│   ├── lowrank2d.py
│   ├── tfno.py
│   └── utils.py
├── pinns.py
├── prepare_data.py
├── profile-solver-legacy.py
├── profiler/
│   └── calmacs.py
├── run_pino2d.py
├── run_pino3d.py
├── run_solver.py
├── scripts/
│   ├── device1-finetune.sh
│   ├── device2-finetune.sh
│   ├── device3.sh
│   ├── finetune-4k-2layer.sh
│   ├── finetune-4k0.sh
│   ├── finetune-4k1-2layer.sh
│   ├── finetune-4k1.sh
│   ├── finetune-4k4-2layer.sh
│   ├── fnoRe500.sh
│   ├── ngc_submit_pino.sh
│   ├── ngc_test_submit_pino.sh
│   ├── pretrain.sh
│   ├── scratchRe500.sh
│   ├── test-opt/
│   │   └── Re500-1_8.sh
│   ├── train_dat0.sh
│   ├── train_dat200.sh
│   ├── train_dat40.sh
│   ├── train_dat400.sh
│   ├── train_dat80.sh
│   ├── train_dat800.sh
│   ├── train_res16.sh
│   └── train_res32.sh
├── solver/
│   ├── __init__.py
│   ├── kolmogorov_flow.py
│   ├── legacy_solver.py
│   ├── periodic.py
│   ├── random_fields.py
│   ├── rfsampler.py
│   └── spectrum.py
├── train_PINO3d.py
├── train_burgers.py
├── train_darcy.py
├── train_no.py
├── train_operator.py
├── train_pino.py
├── train_unet.py
└── train_utils/
    ├── __init__.py
    ├── adam.py
    ├── data_utils.py
    ├── datasets.py
    ├── distributed.py
    ├── eval_2d.py
    ├── eval_3d.py
    ├── losses.py
    ├── negadam.py
    ├── train_2d.py
    ├── train_3d.py
    └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
.vscode
*.py
wandb
config
docs
models
*/*.py
exp
checkpoints
*/__pycache__/**

================================================
FILE: .gitignore
================================================
data
log
.vscode
wandb
**/__pycache__/**
.idea
figs
checkpoints
.ipynb_checkpoints
*.ipynb
*.pt
*.pth
tensordiffeq
exp

================================================
FILE: Dockerfile
================================================
FROM nvcr.io/nvidia/pytorch:22.09-py3
RUN useradd -ms /bin/bash pino
USER pino
ENV PATH=/home/pino/.local/bin:$PATH
RUN pip install --user \
    wandb tqdm pyyaml

================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# Physics-Informed Neural Operator for Learning Partial Differential Equations

# 📢 DEPRECATION NOTICE 📢  
----------------------------

🚨 **This repository is no longer maintained.** 🚨 The code in this repository is **deprecated** and may not work with newer dependencies or frameworks.  
For the most up-to-date implementation and continued development, please visit:  

## ➡️ **[NeuralOperator](https://github.com/neuraloperator/neuraloperator)** ⬅️

🔴 We strongly recommend using the latest version to ensure compatibility, performance, and support.🔴  

----------------------------
![PINO Diagram](docs/pino-diagram4.png)

[comment]: <> (![Results on Navier Stokes equation]&#40;docs/solver-pino.png&#41;)

<img src="docs/solver-pino-pinn.png" alt="Results on Navier Stokes equation" width="720" height="501"/>

# Paper Info


This repo contains code for experiments from the paper [Physics-Informed Neural Operator for Learning Partial Differential Equations](https://arxiv.org/abs/2111.03794) (2021) by Zongyi Li, Hongkai Zheng, Nikola Kovachki, David Jin, Haoxuan Chen, Burigede Liu, Kamyar Azizzadenesheli, and Anima Anandkumar.

Abstract: 
> Machine learning methods have recently shown promise in solving partial differential equations (PDEs). They can be classified into two broad categories: solution function approximation and operator learning. The Physics-Informed Neural Network (PINN) is an example of the former while the Fourier neural operator (FNO) is an example of the latter. Both these approaches have shortcomings. The optimization in PINN is challenging and prone to failure, especially on multi-scale dynamic systems. FNO does not suffer from this optimization issue since it carries out supervised learning on a given dataset, but obtaining such data may be too expensive or infeasible. In this work, we propose the physics-informed neural operator (PINO), where we combine the operating-learning and function-optimization frameworks, and this improves convergence rates and accuracy over both PINN and FNO models. In the operator-learning phase, PINO learns the solution operator over multiple instances of the parametric PDE family. In the test-time optimization phase, PINO optimizes the pre-trained operator ansatz for the querying instance of the PDE. Experiments show PINO outperforms previous ML methods on many popular PDE families while retaining the extraordinary speed-up of FNO compared to solvers. In particular, PINO accurately solves long temporal transient flows and  Kolmogorov flows, while PINN and other methods fail to converge.
## Requirements
- Pytorch 1.8.0 or later
- wandb
- tqdm
- scipy
- h5py
- numpy
- DeepXDE:latest
- Latest code from tensordiffeq github master branch (Not tensordiffeq 0.19)
- tensorflow 2.4.0

## Data description
### Burgers equation
[burgers_pino.mat](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/burgers_pino.mat)

### Darcy flow 
- spatial domain: $x\in (0,1)^2$
- Data file: 
  - [piececonst_r421_N1024_smooth1.mat](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/piececonst_r421_N1024_smooth1.mat)
  - [piececonst_r421_N1024_smooth2.mat](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/piececonst_r421_N1024_smooth2.mat)
- Raw data shape: 1024x421x421


### Long roll out of Navier Stokes equation
- spatial domain: $x\in (0, 1)^2$
- temporal domain: $t\in \[0, 49\]$
- forcing: $0.1(\sin(2\pi(x_1+x_2)) + \cos(2\pi(x_1+x_2)))$
- viscosity = 0.001

Data file: `nv_V1e-3_N5000_T50.mat`, with shape 50 x 64 x 64 x 5000 

- train set: -1-4799
- test set: 4799-4999
### Navier Stokes with Reynolds number 500
- spatial domain: $x\in (0, 2\pi)^2$
- temporal domain: $t \in \[0, 0.5\]$
- forcing: $-4\cos(4x_2)$
- Reynolds number: 500

Train set: data of shape (N, T, X, Y) where N is the number of instances, T is temporal resolution, X, Y are spatial resolutions. 
1. [NS_fft_Re500_T4000.npy](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fft_Re500_T4000.npy) : 4000x64x64x65
2. [NS_fine_Re500_T128_part0.npy](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re500_T128_part0.npy): 100x129x128x128
3. [NS_fine_Re500_T128_part1.npy](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re500_T128_part1.npy): 100x129x128x128

Test set: data of shape (N, T, X, Y) where N is the number of instances, T is temporal resolution, X, Y are spatial resolutions. 
1. [NS_Re500_s256_T100_test.npy](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_Re500_s256_T100_test.npy): 100x129x256x256
2. [NS_fine_Re500_T128_part2.npy](https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re500_T128_part2.npy): 100x129x128x128

Configuration file format: see `.yaml` files under folder `configs` for detail. 

## Code for Burgers equation
### Train PINO
To run PINO for Burgers equation, use, e.g.,
```bash 
python3 train_burgers.py --config_path configs/pretrain/burgers-pretrain.yaml --mode train
```

To test PINO for burgers equation, use, e.g., 
```bash
python3 train_burgers.py --config_path configs/test/burgers.yaml --mode test
```

## Code for Darcy Flow

### Operator learning
To run PINO for Darcy Flow, use, e.g., 
```bash
python3 train_operator.py --config_path configs/pretrain/Darcy-pretrain.yaml
```
To evaluate operator for Darcy Flow, use, e.g., 
```bash
python3 eval_operator.py --config_path configs/test/darcy.yaml
```

### Test-time optimization
To do test-time optimization for Darcy Flow, use, e.g., 
```bash
python3 run_pino2d.py --config_path configs/finetune/Darcy-finetune.yaml --start [starting index] --stop [stopping index]
```

### Baseline
To run DeepONet, use, e.g., 
```bash
python3 deeponet.py --config_path configs/pretrain/Darcy-pretrain-deeponet.yaml --mode train 
```
To test DeepONet, use, e.g., 
```bash
python3 deeponet.py --config_path configs/test/darcy.yaml --mode test
```


## Code for Navier Stokes equation
### Run exp on new dataset
Train PINO with 800 low-res data and 2200 PDE. 
```bash
python3 train_pino.py --config configs/operator/Re500-1_8-800-PINO-s.yaml
```
Train FNO with 800 low-res data and 2200 PDE.
```bash
python3 train_pino.py --config configs/operator/Re500-1_8-800-FNO-s.yaml
```
Run instance-wise finetuning
```bash
python3 instance_opt.py --config configs/instance/Re500-1_8-PINO-s.yaml
```

### Train PINO for short time period
To run operator learning, use, e.g., 
```bash
python3 train_operator.py --config_path configs/pretrain/Re500-pretrain-05s-4C0.yaml
```
To evaluate trained operator, use
```bash
python3 eval_operator.py --config_path configs/test/Re500-05s.yaml
```
To run test-time optimization, use
```bash
python3 train_PINO3d.py --config_path configs/***.yaml 
```

To train Navier Stokes equations sequentially without running `train_PINO3d.py` multiple times, use

```bash
python3 run_pino3d.py --config_path configs/[configuration file name].yaml --start [index of the first data] --stop [which data to stop]
```


### Baseline for short time period
To train DeepONet, use 
```bash
python3 deeponet.py --config_path configs/[configuration file].yaml --mode train
```

To test DeepONet, use 
```bash
python3 deeponet.py --config_path configs/[configuration file].yaml --mode test
```

To train and test PINNs, use, e.g.,  
```bash
python3 pinns.py --config_path configs/baseline/Re500-pinns-05s.yaml --start [starting index] --stop [stopping index]
```

To train and test LAAF-PINN, use, e.g., 
```bash
python3 pinns.py configs/baseline/Re500-pinns-05s-LAAF.yaml --start [starting index] --stop [stopping index]
```

To train and test SA-PINNs, first copy the latest code of tensordiffeq under the working directory.
Then run: 
```bash 
DDEBACKEND=pytorch python3 pinns.py configs/baseline/Re500-pinns-05s-SA.yaml --start [starting index] --stop [stopping index]
```

### Baseline for long roll out
To train and test PINNs, use
```bash
python3 pinns.py --config_path configs/baseline/NS-50s.yaml --start [starting index] --stop [stopping index]
```

To train and test LAAF-PINN, use, e.g., 
```bash
python3 pinns.py --config_path configs/baseline/NS-50s-LAAF.yaml --start [starting index] --stop [stopping index]
```

### Pseudospectral solver for Navier Stokes equation
To run solver, use 
```bash
python3 run_solver.py --config_path configs/Re500-0.5s.yaml
```


================================================
FILE: baselines/__init__.py
================================================


================================================
FILE: baselines/data.py
================================================
import numpy as np
import torch
from torch.utils.data import Dataset
from .utils import get_xytgrid, get_3dboundary, get_3dboundary_points
from train_utils.utils import vor2vel, torch2dgrid
import scipy.io
import h5py


class DarcyFlow(Dataset):
    def __init__(self,
                 datapath,
                 nx, sub,
                 offset=0,
                 num=1):
        self.S = int(nx // sub) + 1
        data = scipy.io.loadmat(datapath)
        a = data['coeff']
        u = data['sol']
        self.a = torch.tensor(a[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.u = torch.tensor(u[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.mesh = torch2dgrid(self.S, self.S)

    def __len__(self):
        return self.a.shape[0]

    def __getitem__(self, item):
        fa = self.a[item]
        return fa.reshape(-1), self.u[item].reshape(-1)


class NSLong(object):
    def __init__(self,
                 datapath,
                 nx, nt,
                 time_scale,
                 offset=0,
                 num=1, vel=False):
        '''
        Load data from mat
        Args:
            datapath: path to data file
            nx: number of points in each spatial domain
            nt: number of points in temporal domain
            offset: index of the instance
            num: number of instances
            vel: compute velocity from vorticity if True
        '''

        self.time_scale = time_scale
        self.S = nx
        self.T = nt

        with h5py.File(datapath, mode='r') as file:
            raw = file['u']
            data = np.array(raw)
        vor = torch.tensor(data, dtype=torch.float).permute(3, 1, 2, 0)
        self.vor = vor[offset: offset + num, :, :, :]     # num x 64 x 64 x 50
        if vel:
            self.vel_u, self.vel_v = vor2vel(self.vor, L=1.0)

    def get_boundary_value(self, component=0):
        '''
            Get the boundary value for component-th output
            Args:
                component: int, 0: velocity_u; 1: velocity_v; 2: vorticity;
            Returns:
                value: N by 1 array, boundary value of the component
        '''
        if component == 0:
            value = self.vel_u
        elif component == 1:
            value = self.vel_v
        elif component == 2:
            value = self.vor
        else:
            raise ValueError(f'No component {component} ')

        boundary = get_3dboundary(value)
        return boundary

    def get_boundary_points(self, num_x, num_y, num_t):
        points = get_3dboundary_points(num_x, num_y, num_t,
                                       bot=(0,0,0),
                                       top=(1, 1, self.time_scale))
        return points

    def get_test_xyt(self):
        '''

        Returns:
            points: (x, y, t) array with shape (S * S * T, 3)
            values: (u, v, w) array with shape (S * S * T, 3)

        '''
        points = get_xytgrid(S=self.S, T=self.T,
                             bot=[0, 0, 0],
                             top=[1, 1, self.time_scale])
        u_val = np.ravel(self.vel_u)
        v_val = np.ravel(self.vel_v)
        w_val = np.ravel(self.vor)
        values = np.stack([u_val, v_val, w_val], axis=0).T
        return points, values


class NSdata(object):
    def __init__(self, datapath1,
                 nx, nt,
                 offset=0, num=1,
                 datapath2=None,
                 sub=1, sub_t=1,
                 vel=False, t_interval=1.0):
        '''
        Load data from npy and reshape to (N, X, Y, T)
        Args:
            datapath1: path to data
            nx: number of points in each spatial domain
            nt: number of points in temporal domain
            offset: index of the instance
            num: number of instances
            datapath2: path to second part of data, default None
            sub: downsample interval of spatial domain
            sub_t: downsample interval of temporal domain
            N:
            t_interval:
        '''
        self.S = nx // sub
        self.T = int(nt * t_interval) // sub_t + 1
        self.time_scale = t_interval
        data1 = np.load(datapath1)
        data1 = torch.tensor(data1, dtype=torch.float)[..., ::sub_t, ::sub, ::sub]

        if datapath2 is not None:
            data2 = np.load(datapath2)
            data2 = torch.tensor(data2, dtype=torch.float)[..., ::sub_t, ::sub, ::sub]
        if t_interval == 0.5:
            data1 = self.extract(data1)
            if datapath2 is not None:
                data2 = self.extract(data2)
        # transpose data into (N, S, S, T)
        part1 = data1.permute(0, 2, 3, 1)
        if datapath2 is not None:
            part2 = data2.permute(0, 2, 3, 1)
            self.data = torch.cat((part1, part2), dim=0)
        else:
            self.data = part1
        self.vor = self.data[offset: offset + num, :, :, :].cpu()
        if vel:
            self.vel_u, self.vel_v = vor2vel(self.vor)  # Compute velocity from vorticity

    def get_init_cond(self):
        values = np.stack([self.vel_u[0, :, :, 0],
                           self.vel_v[0, :, :, 0],
                           self.vor[0, :, :, 0]], axis=2)
        return values

    def get_boundary_value(self, component=0):
        '''
        Get the boundary value for component-th output
        Args:
            component: int, 0: velocity_u; 1: velocity_v; 2: vorticity;
        Returns:
            value: N by 1 array, boundary value of the component
        '''
        if component == 0:
            value = self.vel_u
        elif component == 1:
            value = self.vel_v
        elif component == 2:
            value = self.vor
        else:
            raise ValueError(f'No component {component} ')

        boundary = get_3dboundary(value)
        return boundary

    def get_boundary_points(self, num_x, num_y, num_t):
        '''
        Args:
            num_x:
            num_y:

        Returns:
            points: N by 3 array
        '''
        points = get_3dboundary_points(num_x, num_y, num_t,
                                       bot=(0, 0, 0),
                                       top=(2 * np.pi, 2 * np.pi, self.time_scale))
        # x_arr = np.linspace(0, 2 * np.pi, num=num_x, endpoint=False)
        # y_arr = np.linspace(0, 2 * np.pi, num=num_y, endpoint=False)
        # xx, yy = np.meshgrid(x_arr, y_arr, indexing='ij')
        # xarr = np.ravel(xx)
        # yarr = np.ravel(yy)
        # tarr = np.zeros_like(xarr)
        # point0 = np.stack([xarr, yarr, tarr], axis=0).T     # (128x128x1, 3), boundary on t=0
        #
        # # tarr = np.ones_like(xarr) * self.time_scale
        # # point1 = np.stack([xarr, yarr, tarr], axis=0).T     # (128x128x1, 3), boundary on t=0.5
        #
        # t_arr = np.linspace(0, self.time_scale, num=num_t)
        # yy, tt = np.meshgrid(y_arr, t_arr, indexing='ij')
        # yarr = np.ravel(yy)
        # tarr = np.ravel(tt)
        # xarr = np.zeros_like(yarr)
        # point2 = np.stack([xarr, yarr, tarr], axis=0).T     # (1x128x65, 3), boundary on x=0
        #
        # xarr = np.ones_like(yarr) * 2 * np.pi
        # point3 = np.stack([xarr, yarr, tarr], axis=0).T     # (1x128x65, 3), boundary on x=2pi
        #
        # xx, tt = np.meshgrid(x_arr, t_arr, indexing='ij')
        # xarr = np.ravel(xx)
        # tarr = np.ravel(tt)
        # yarr = np.zeros_like(xarr)
        # point4 = np.stack([xarr, yarr, tarr], axis=0).T     # (128x1x65, 3), boundary on y=0
        #
        # yarr = np.ones_like(xarr) * 2 * np.pi
        # point5 = np.stack([xarr, yarr, tarr], axis=0).T     # (128x1x65, 3), boundary on y=2pi
        #
        # points = np.concatenate([point0,
        #                          point2, point3,
        #                          point4, point5],
        #                         axis=0)
        return points

    def get_test_xyt(self):
        '''

        Returns:
            points: (x, y, t) array with shape (S * S * T, 3)
            values: (u, v, w) array with shape (S * S * T, 3)

        '''
        points = get_xytgrid(S=self.S, T=self.T,
                             bot=[0, 0, 0],
                             top=[2 * np.pi, 2 * np.pi, self.time_scale])
        u_val = np.ravel(self.vel_u)
        v_val = np.ravel(self.vel_v)
        w_val = np.ravel(self.vor)
        values = np.stack([u_val, v_val, w_val], axis=0).T
        return points, values


    @staticmethod
    def extract(data):
        '''
        Extract data with time range 0-0.5, 0.25-0.75, 0.5-1.0, 0.75-1.25,...
        Args:
            data: tensor with size N x 129 x 128 x 128

        Returns:
            output: (4*N-1) x 65 x 128 x 128
        '''
        T = data.shape[1] // 2
        interval = data.shape[1] // 4
        N = data.shape[0]
        new_data = torch.zeros(4 * N - 1, T + 1, data.shape[2], data.shape[3])
        for i in range(N):
            for j in range(4):
                if i == N - 1 and j == 3:
                    # reach boundary
                    break
                if j != 3:
                    new_data[i * 4 + j] = data[i, interval * j:interval * j + T + 1]
                else:
                    new_data[i * 4 + j, 0: interval] = data[i, interval * j:interval * j + interval]
                    new_data[i * 4 + j, interval: T + 1] = data[i + 1, 0:interval + 1]
        return new_data


class DeepOnetNS(Dataset):
    '''
    Dataset class customized for DeepONet's input format
    '''
    def __init__(self, datapath,
                 nx, nt,
                 offset=0, num=1,
                 sub=1, sub_t=1,
                 t_interval=1.0):
        self.S = nx // sub
        self.T = int(nt * t_interval) // sub_t + 1
        self.time_scale = t_interval
        self.N = num
        data = np.load(datapath)
        data = torch.tensor(data, dtype=torch.float)[..., ::sub_t, ::sub, ::sub]
        if t_interval == 0.5:
                    data = NSdata.extract(data)
        # transpose data into (N, S, S, T)
        data = data.permute(0, 2, 3, 1)
        self.vor = data[offset: offset + num, :, :, :]
        points = get_xytgrid(S=self.S, T=self.T,
                             bot=[0, 0, 0],
                             top=[2 * np.pi, 2 * np.pi, self.time_scale])
        self.xyt = torch.tensor(points, dtype=torch.float)
        # (SxSxT, 3)

    def __len__(self):
        return self.N * self.S * self.S * self.T

    def __getitem__(self, idx):
        num_per_instance = self.S ** 2 * self.T
        instance_id = idx // num_per_instance
        pos_id = idx % num_per_instance
        point = self.xyt[pos_id]
        u0 = self.vor[instance_id, :, :, 0].reshape(-1)
        y = self.vor[instance_id].reshape(-1)[pos_id]
        return u0, point, y


class DeepONetCPNS(Dataset):
    '''
        Dataset class customized for DeepONet cartesian product's input format
        '''

    def __init__(self, datapath,
                 nx, nt,
                 offset=0, num=1,
                 sub=1, sub_t=1,
                 t_interval=1.0):
        self.S = nx // sub
        self.T = int(nt * t_interval) // sub_t + 1
        self.time_scale = t_interval
        self.N = num
        data = np.load(datapath)
        data = torch.tensor(data, dtype=torch.float)[..., ::sub_t, ::sub, ::sub]
        if t_interval == 0.5:
            data = NSdata.extract(data)
        # transpose data into (N, S, S, T)
        data = data.permute(0, 2, 3, 1)
        self.vor = data[offset: offset + num, :, :, :]
        points = get_xytgrid(S=self.S, T=self.T,
                             bot=[0, 0, 0],
                             top=[2 * np.pi, 2 * np.pi, self.time_scale])
        self.xyt = torch.tensor(points, dtype=torch.float)
        # (SxSxT, 3)

    def __len__(self):
        return self.N

    def __getitem__(self, idx):
        '''

        Args:
            idx:

        Returns:
            u0: (batchsize, u0_dim)
            y: (batchsize, SxSxT)
        '''
        u0 = self.vor[idx, :, :, 0].reshape(-1)
        y = self.vor[idx, :, :, :].reshape(-1)
        return u0, y


================================================
FILE: baselines/deepxde_deeponet.py
================================================
import random
import deepxde as dde
from baselines.data import NSdata

'''
Training deepONet using deepxde implementation. 
Note that deepxde requires passing the whole dataset to Triple, which is very memory consuming. 
'''


def train(config):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    # construct dataloader
    data_config = config['data']
    train_set = NSdata(datapath1=data_config['datapath'],
                       offset=0, num=10,
                       nx=data_config['nx'], nt=data_config['nt'],
                       sub=data_config['sub'], sub_t=data_config['sub_t'],
                       vel=False,
                       t_interval=data_config['time_interval'])
    val_set = NSdata(datapath1=data_config['data_val'],
                     offset=310, num=10,
                     nx=data_config['val_nx'], nt=data_config['val_nt'],
                     sub=data_config['val_sub'], sub_t=data_config['val_subt'],
                     vel=False,
                     t_interval=data_config['time_interval'])
    # assert train_set.S == val_set.S
    dim_a = train_set.S ** 2
    dim_x = 3
    X_train, y_train = train_set.get_operator_data()
    X_val, y_val = val_set.get_operator_data()
    data = dde.data.Triple(X_train=X_train, y_train=y_train, X_test=X_val, y_test=y_val)

    activation = config['model']['activation']
    initializer = 'Glorot normal'   # He normal or Glorot normal

    net = dde.maps.DeepONet([dim_a] + config['model']['layers'],
                            [dim_x] + config['model']['layers'],
                            activation,
                            initializer,
                            use_bias=True,
                            stacked=False)
    model = dde.Model(data, net)
    model.compile('adam', lr=config['train']['base_lr'])
    checker = dde.callbacks.ModelCheckpoint(
        'checkpoints/deeponet.ckpt', save_better_only=True, period=10,
    )
    model.train(epochs=config['train']['epochs'], callbacks=[checker])


================================================
FILE: baselines/loss.py
================================================
import torch
import torch.autograd as autograd
from train_utils.utils import set_grad
from .utils import get_sample, net_NS, sub_mse


def boundary_loss(model, npt=100):
    device = next(model.parameters()).device

    bc1_x_sample, bc1_y_sample, bc1_t_sample, bc2_x_sample, bc2_y_sample, bc2_t_sample \
        = get_sample(npt)

    bc1_x_sample, bc1_y_sample, bc1_t_sample, bc2_x_sample, bc2_y_sample, bc2_t_sample \
        = bc1_x_sample.to(device), bc1_y_sample.to(device), bc1_t_sample.to(device), \
          bc2_x_sample.to(device), bc2_y_sample.to(device), bc2_t_sample.to(device)
    set_grad([bc1_x_sample, bc1_y_sample, bc1_t_sample, bc2_x_sample, bc2_y_sample, bc2_t_sample])

    u1, v1, _ = net_NS(bc1_x_sample, bc1_y_sample, bc1_t_sample, model)
    u2, v2, _ = net_NS(bc2_x_sample, bc2_y_sample, bc2_t_sample, model)
    bc_loss = sub_mse(u1) + sub_mse(v1) + sub_mse(u2) + sub_mse(v2)
    return 0.5 * bc_loss  # 0.5 is the normalization factor


def resf_NS(u, v, p, x, y, t, re=40):
    '''
    Args:
        u: x-component, tensor
        v: y-component, tensor
        x: x-dimension, tensor
        y: y-dimension, tensor
        t: time dimension, tensor
    Returns:
        Residual f error
    '''
    u_x, u_y, u_t = autograd.grad(outputs=[u.sum()], inputs=[x, y, t], create_graph=True)
    v_x, v_y, v_t = autograd.grad(outputs=[v.sum()], inputs=[x, y, t], create_graph=True)
    u_xx, = autograd.grad(outputs=[u_x.sum()], inputs=[x], create_graph=True)
    u_yy, = autograd.grad(outputs=[u_y.sum()], inputs=[y], create_graph=True)
    v_xx, = autograd.grad(outputs=[v_x.sum()], inputs=[x], create_graph=True)
    v_yy, = autograd.grad(outputs=[v_y.sum()], inputs=[y], create_graph=True)
    p_x, = autograd.grad(outputs=[p.sum()], inputs=[x], create_graph=True)
    p_y, = autograd.grad(outputs=[p.sum()], inputs=[y], create_graph=True)
    res_x = u_t + u * u_x + v * u_y + p_x - 1 / re * (u_xx + u_yy) - torch.sin(4 * y)
    res_y = v_t + u * v_x + v * v_y + p_y - 1 / re * (v_xx + v_yy)
    evp3 = u_x + v_y
    return res_x, res_y, evp3


================================================
FILE: baselines/model.py
================================================
import torch
import torch.nn as nn
from models.FCN import DenseNet
from typing import List
from .utils import weighted_mse


class DeepONet(nn.Module):
    def __init__(self, branch_layer, trunk_layer):
        super(DeepONet, self).__init__()
        self.branch = DenseNet(branch_layer, nn.ReLU)
        self.trunk = DenseNet(trunk_layer, nn.ReLU)

    def forward(self, u0, grid):
        a = self.branch(u0)
        b = self.trunk(grid)
        batchsize = a.shape[0]
        dim = a.shape[1]
        return torch.bmm(a.view(batchsize, 1, dim), b.view(batchsize, dim, 1))


class DeepONetCP(nn.Module):
    def __init__(self, branch_layer, trunk_layer):
        super(DeepONetCP, self).__init__()
        self.branch = DenseNet(branch_layer, nn.ReLU)
        self.trunk = DenseNet(trunk_layer, nn.ReLU)

    def forward(self, u0, grid):
        a = self.branch(u0)
        # batchsize x width
        b = self.trunk(grid)
        # N x width
        return torch.einsum('bi,ni->bn', a, b)


class SAWeight(nn.Module):
    def __init__(self, out_dim, num_init: List, num_bd: List, num_collo: List):
        super(SAWeight, self).__init__()
        self.init_param = nn.ParameterList(
            [nn.Parameter(100 * torch.rand(num, out_dim)) for num in num_init]
        )

        self.bd_param = nn.ParameterList(
            [nn.Parameter(torch.rand(num, out_dim)) for num in num_bd]
        )

        self.collo_param = nn.ParameterList(
            [nn.Parameter(torch.rand(num, out_dim)) for num in num_collo]
        )

    def forward(self, init_cond: List, bd_cond: List, residual: List):
        total_loss = 0.0
        for param, init_loss in zip(self.init_param, init_cond):
            total_loss += weighted_mse(init_loss, 0, param)

        for param, bd in zip(self.bd_param, bd_cond):
            total_loss += weighted_mse(bd, 0, param)

        for param, res in zip(self.collo_param, residual):
            total_loss += weighted_mse(res, 0, param)
        return total_loss

================================================
FILE: baselines/pinns_ns_05s.py
================================================
'''
training for Navier Stokes with Reynolds number 500, 0.5 second time period
'''
import csv
import random
from timeit import default_timer
import deepxde as dde
from deepxde.optimizers.config import set_LBFGS_options
import numpy as np
from baselines.data import NSdata

import tensorflow as tf

Re = 500


def forcing(x):
    return - 4 * tf.math.cos(4 * x[:, 1:2])


def pde(x, u):
    '''
    Args:
        x: (x, y, t)
        u: (u, v, w), where (u,v) is the velocity, w is the vorticity
    Returns: list of pde loss

    '''
    u_vel, v_vel, w = u[:, 0:1], u[:, 1:2], u[:, 2:3]

    u_vel_x = dde.grad.jacobian(u, x, i=0, j=0)
    u_vel_xx = dde.grad.hessian(u, x, component=0, i=0, j=0)
    u_vel_yy = dde.grad.hessian(u, x, component=0, i=1, j=1)

    v_vel_y = dde.grad.jacobian(u, x, i=1, j=1)
    v_vel_xx = dde.grad.hessian(u, x, component=1, i=0, j=0)
    v_vel_yy = dde.grad.hessian(u, x, component=1, i=1, j=1)

    w_vor_x = dde.grad.jacobian(u, x, i=2, j=0)
    w_vor_y = dde.grad.jacobian(u, x, i=2, j=1)
    w_vor_t = dde.grad.jacobian(u, x, i=2, j=2)

    w_vor_xx = dde.grad.hessian(u, x, component=2, i=0, j=0)
    w_vor_yy = dde.grad.hessian(u, x, component=2, i=1, j=1)

    eqn1 = w_vor_t + u_vel * w_vor_x + v_vel * w_vor_y - \
           1 / Re * (w_vor_xx + w_vor_yy) - forcing(x)
    eqn2 = u_vel_x + v_vel_y
    eqn3 = u_vel_xx + u_vel_yy + w_vor_y
    eqn4 = v_vel_xx + v_vel_yy - w_vor_x
    return [eqn1, eqn2, eqn3, eqn4]


def eval(model, dataset,
         step, time_cost,
         offset, config):
    '''
    evaluate test error for the model over dataset
    '''
    test_points, test_vals = dataset.get_test_xyt()

    pred = model.predict(test_points)
    vel_u_truth = test_vals[:, 0]
    vel_v_truth = test_vals[:, 1]
    vor_truth = test_vals[:, 2]

    vel_u_pred = pred[:, 0]
    vel_v_pred = pred[:, 1]
    vor_pred = pred[:, 2]

    u_err = dde.metrics.l2_relative_error(vel_u_truth, vel_u_pred)
    v_err = dde.metrics.l2_relative_error(vel_v_truth, vel_v_pred)
    vor_err = dde.metrics.l2_relative_error(vor_truth, vor_pred)
    print(f'Instance index : {offset}')
    print(f'L2 relative error in u: {u_err}')
    print(f'L2 relative error in v: {v_err}')
    print(f'L2 relative error in vorticity: {vor_err}')
    with open(config['log']['logfile'], 'a') as f:
        writer = csv.writer(f)
        writer.writerow([offset, u_err, v_err, vor_err, step, time_cost])


def train(offset, config, args):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    np.random.seed(seed)
    # construct dataloader
    data_config = config['data']
    if 'datapath2' in data_config:
        dataset = NSdata(datapath1=data_config['datapath'],
                         datapath2=data_config['datapath2'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    else:
        dataset = NSdata(datapath1=data_config['datapath'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    spatial_domain = dde.geometry.Rectangle(xmin=[0, 0], xmax=[2 * np.pi, 2 * np.pi])
    temporal_domain = dde.geometry.TimeDomain(0, data_config['time_interval'])
    st_domain = dde.geometry.GeometryXTime(spatial_domain, temporal_domain)
    num_boundary_points = dataset.S
    points = dataset.get_boundary_points(num_x=num_boundary_points, num_y=num_boundary_points,
                                         num_t=dataset.T)
    u_value = dataset.get_boundary_value(component=0)
    v_value = dataset.get_boundary_value(component=1)
    w_value = dataset.get_boundary_value(component=2)
    # u, v are velocity, w is vorticity
    boundary_u = dde.PointSetBC(points=points, values=u_value, component=0)
    boundary_v = dde.PointSetBC(points=points, values=v_value, component=1)
    boundary_w = dde.PointSetBC(points=points, values=w_value, component=2)

    data = dde.data.TimePDE(
        st_domain,
        pde,
        [
            boundary_u,
            boundary_v,
            boundary_w
        ],
        num_domain=config['train']['num_domain'],
        num_boundary=config['train']['num_boundary'],
        num_test=config['train']['num_test'],
    )

    net = dde.maps.FNN(config['model']['layers'],
                       config['model']['activation'],
                       'Glorot normal')
    # net = dde.maps.STMsFFN([3] + 4 * [50] + [3], 'tanh', 'Glorot normal', [50], [50])
    model = dde.Model(data, net)

    model.compile('adam', lr=config['train']['base_lr'], loss_weights=[1, 1, 1, 1, 100, 100, 100])
    if 'log_step' in config['train']:
        step_size = config['train']['log_step']
    else:
        step_size = 100
    epochs = config['train']['epochs'] // step_size

    for i in range(epochs):
        time_start = default_timer()
        model.train(epochs=step_size, display_every=step_size)
        time_end = default_timer()
        eval(model, dataset, i * step_size,
             time_cost=time_end - time_start,
             offset=offset,
             config=config)
    print('Done!')
    # set_LBFGS_options(maxiter=10000)
    # model.compile('L-BFGS', loss_weights=[1, 1, 1, 1, 100, 100, 100])
    # model.train()

    # test_points, test_vals = dataset.get_test_xyt()
    #
    # pred = model.predict(test_points)
    # vel_u_truth = test_vals[:, 0]
    # vel_v_truth = test_vals[:, 1]
    # vor_truth = test_vals[:, 2]
    #
    # vel_u_pred = pred[:, 0]
    # vel_v_pred = pred[:, 1]
    # vor_pred = pred[:, 2]
    #
    # u_err = dde.metrics.l2_relative_error(vel_u_truth, vel_u_pred)
    # v_err = dde.metrics.l2_relative_error(vel_v_truth, vel_v_pred)
    # vor_err = dde.metrics.l2_relative_error(vor_truth, vor_pred)
    # print(f'Instance index : {offset}')
    # print(f'L2 relative error in u: {u_err}')
    # print(f'L2 relative error in v: {v_err}')
    # print(f'L2 relative error in vorticity: {vor_err}')
    # with open(args.logfile, 'a') as f:
    #     writer = csv.writer(f)
    #     writer.writerow([offset, u_err, v_err, vor_err])


================================================
FILE: baselines/pinns_ns_50s.py
================================================
'''
training for Navier Stokes with viscosity 0.001
spatial domain: (0, 1) ** 2
temporal domain: [0, 49]
'''
import csv
import random
from timeit import default_timer
import deepxde as dde
from deepxde.optimizers.config import set_LBFGS_options
import numpy as np
from baselines.data import NSLong

import tensorflow as tf


def forcing(x):
    theta = x[:, 0:1] + x[:, 1:2]
    return 0.1 * (tf.math.sin(2 * np.pi * theta) + tf.math.cos(2 * np.pi * theta))


def pde(x, u):
    '''
    Args:
        x: (x, y, t)
        u: (u, v, w), where (u,v) is the velocity, w is the vorticity
    Returns: list of pde loss

    '''
    u_vel, v_vel, w = u[:, 0:1], u[:, 1:2], u[:, 2:3]

    u_vel_x = dde.grad.jacobian(u, x, i=0, j=0)
    u_vel_xx = dde.grad.hessian(u, x, component=0, i=0, j=0)
    u_vel_yy = dde.grad.hessian(u, x, component=0, i=1, j=1)

    v_vel_y = dde.grad.jacobian(u, x, i=1, j=1)
    v_vel_xx = dde.grad.hessian(u, x, component=1, i=0, j=0)
    v_vel_yy = dde.grad.hessian(u, x, component=1, i=1, j=1)

    w_vor_x = dde.grad.jacobian(u, x, i=2, j=0)
    w_vor_y = dde.grad.jacobian(u, x, i=2, j=1)
    w_vor_t = dde.grad.jacobian(u, x, i=2, j=2)

    w_vor_xx = dde.grad.hessian(u, x, component=2, i=0, j=0)
    w_vor_yy = dde.grad.hessian(u, x, component=2, i=1, j=1)

    eqn1 = w_vor_t + u_vel * w_vor_x + v_vel * w_vor_y - \
           0.001 * (w_vor_xx + w_vor_yy) - forcing(x)
    eqn2 = u_vel_x + v_vel_y
    eqn3 = u_vel_xx + u_vel_yy + w_vor_y
    eqn4 = v_vel_xx + v_vel_yy - w_vor_x
    return [eqn1, eqn2, eqn3, eqn4]


def eval(model, dataset,
         step, time_cost,
         offset, config):
    '''
    evaluate test error for the model over dataset
    '''
    test_points, test_vals = dataset.get_test_xyt()

    pred = model.predict(test_points)
    vel_u_truth = test_vals[:, 0]
    vel_v_truth = test_vals[:, 1]
    vor_truth = test_vals[:, 2]

    vel_u_pred = pred[:, 0]
    vel_v_pred = pred[:, 1]
    vor_pred = pred[:, 2]

    u_err = dde.metrics.l2_relative_error(vel_u_truth, vel_u_pred)
    v_err = dde.metrics.l2_relative_error(vel_v_truth, vel_v_pred)
    vor_err = dde.metrics.l2_relative_error(vor_truth, vor_pred)

    total_num = test_vals.shape[0]
    u50 = test_vals[dataset.T - 1: total_num: dataset.T, 0]
    v50 = test_vals[dataset.T - 1: total_num: dataset.T, 1]
    vor50 = test_vals[dataset.T - 1: total_num: dataset.T, 2]

    u50_pred = pred[dataset.T - 1: total_num: dataset.T, 0]
    v50_pred = pred[dataset.T - 1: total_num: dataset.T, 1]
    vor50_pred = pred[dataset.T - 1: total_num: dataset.T, 2]

    u50_err = dde.metrics.l2_relative_error(u50, u50_pred)
    v50_err = dde.metrics.l2_relative_error(v50, v50_pred)
    vor50_err = dde.metrics.l2_relative_error(vor50, vor50_pred)

    print(f'Instance index : {offset}')
    print(f'L2 relative error in u: {u_err}')
    print(f'L2 relative error in v: {v_err}')
    print(f'L2 relative error in vorticity: {vor_err}')

    print(f'Time {dataset.T - 1} L2 relative error of u : {u50_err}')
    print(f'Time {dataset.T - 1} L2 relative error of v : {v50_err}')
    print(f'Time {dataset.T - 1} L2 relative error of vor : {vor50_err}')
    with open(config['log']['logfile'], 'a') as f:
        writer = csv.writer(f)
        writer.writerow([offset, u_err, v_err, vor_err, step, time_cost, u50_err, v50_err, vor50_err])


def train_longtime(offset, config, args):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    np.random.seed(seed)
    # construct dataloader
    data_config = config['data']
    spatial_domain = dde.geometry.Rectangle(xmin=[0, 0], xmax=[1, 1])
    temporal_domain = dde.geometry.TimeDomain(0, data_config['time_scale'])
    st_domain = dde.geometry.GeometryXTime(spatial_domain, temporal_domain)

    dataset = NSLong(datapath=data_config['datapath'],
                     nx=data_config['nx'], nt=data_config['nt'],
                     time_scale=data_config['time_scale'],
                     offset=offset, num=data_config['n_sample'],
                     vel=True)

    points = dataset.get_boundary_points(dataset.S, dataset.S, dataset.T)
    u_value = dataset.get_boundary_value(component=0)
    v_value = dataset.get_boundary_value(component=1)
    w_value = dataset.get_boundary_value(component=2)
    # u, v are velocity, w is vorticity
    boundary_u = dde.PointSetBC(points=points, values=u_value, component=0)
    boundary_v = dde.PointSetBC(points=points, values=v_value, component=1)
    boundary_w = dde.PointSetBC(points=points, values=w_value, component=2)

    data = dde.data.TimePDE(
        st_domain,
        pde,
        [
            boundary_u,
            boundary_v,
            boundary_w
        ],
        num_domain=config['train']['num_domain'],
        num_boundary=config['train']['num_boundary'],
        num_test=config['train']['num_test'],
    )

    net = dde.maps.FNN(config['model']['layers'],
                       config['model']['activation'],
                       'Glorot normal')
    # net = dde.maps.STMsFFN([3] + 4 * [50] + [3], 'tanh', 'Glorot normal', [50], [50])
    model = dde.Model(data, net)

    model.compile('adam', lr=config['train']['base_lr'], loss_weights=[1, 1, 1, 1, 100, 100, 100])
    if 'log_step' in config['train']:
        step_size = config['train']['log_step']
    else:
        step_size = 100
    epochs = config['train']['epochs'] // step_size
    for i in range(epochs):
        time_start = default_timer()
        model.train(epochs=step_size, display_every=step_size)
        time_end = default_timer()
        eval(model, dataset, i * step_size,
             time_cost=time_end - time_start,
             offset=offset,
             config=config)
    print('Done!')


================================================
FILE: baselines/sapinns-50s.py
================================================
import csv
import random
from timeit import default_timer
from tqdm import tqdm
import deepxde as dde
import numpy as np
from baselines.data import NSdata
import torch
from torch.optim import Adam

from tensordiffeq.boundaries import DomainND, periodicBC
from .tqd_utils import PointsIC
from .model import SAWeight

from models.FCN import DenseNet
from train_utils.negadam import NAdam


def forcing(x):
    theta = x[:, 0:1] + x[:, 1:2]
    return 0.1 * (torch.sin(2 * np.pi * theta) + torch.cos(2 * np.pi * theta))


def pde(x, u):
    '''
    Args:
        x: (x, y, t)
        u: (u, v, w), where (u,v) is the velocity, w is the vorticity
    Returns: list of pde loss

    '''
    u_vel, v_vel, w = u[:, 0:1], u[:, 1:2], u[:, 2:3]

    u_vel_x = dde.grad.jacobian(u, x, i=0, j=0)
    u_vel_xx = dde.grad.hessian(u, x, component=0, i=0, j=0)
    u_vel_yy = dde.grad.hessian(u, x, component=0, i=1, j=1)

    v_vel_y = dde.grad.jacobian(u, x, i=1, j=1)
    v_vel_xx = dde.grad.hessian(u, x, component=1, i=0, j=0)
    v_vel_yy = dde.grad.hessian(u, x, component=1, i=1, j=1)

    w_vor_x = dde.grad.jacobian(u, x, i=2, j=0)
    w_vor_y = dde.grad.jacobian(u, x, i=2, j=1)
    w_vor_t = dde.grad.jacobian(u, x, i=2, j=2)

    w_vor_xx = dde.grad.hessian(u, x, component=2, i=0, j=0)
    w_vor_yy = dde.grad.hessian(u, x, component=2, i=1, j=1)

    eqn1 = w_vor_t + u_vel * w_vor_x + v_vel * w_vor_y - \
           0.001 * (w_vor_xx + w_vor_yy) - forcing(x)
    eqn2 = u_vel_x + v_vel_y
    eqn3 = u_vel_xx + u_vel_yy + w_vor_y
    eqn4 = v_vel_xx + v_vel_yy - w_vor_x
    return [eqn1, eqn2, eqn3, eqn4]


def eval(model, dataset,
         step, time_cost,
         offset, config):
    '''
    evaluate test error for the model over dataset
    '''
    test_points, test_vals = dataset.get_test_xyt()

    test_points = torch.tensor(test_points, dtype=torch.float32)
    with torch.no_grad():
        pred = model(test_points).cpu().numpy()
    vel_u_truth = test_vals[:, 0]
    vel_v_truth = test_vals[:, 1]
    vor_truth = test_vals[:, 2]

    vel_u_pred = pred[:, 0]
    vel_v_pred = pred[:, 1]
    vor_pred = pred[:, 2]

    u_err = dde.metrics.l2_relative_error(vel_u_truth, vel_u_pred)
    v_err = dde.metrics.l2_relative_error(vel_v_truth, vel_v_pred)
    vor_err = dde.metrics.l2_relative_error(vor_truth, vor_pred)
    print(f'Instance index : {offset}')
    print(f'L2 relative error in u: {u_err}')
    print(f'L2 relative error in v: {v_err}')
    print(f'L2 relative error in vorticity: {vor_err}')
    with open(config['log']['logfile'], 'a') as f:
        writer = csv.writer(f)
        writer.writerow([offset, u_err, v_err, vor_err, step, time_cost])


def train_sapinn(offset, config, args):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    np.random.seed(seed)
    # construct dataloader
    data_config = config['data']
    if 'datapath2' in data_config:
        dataset = NSdata(datapath1=data_config['datapath'],
                         datapath2=data_config['datapath2'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    else:
        dataset = NSdata(datapath1=data_config['datapath'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    domain = DomainND(['x', 'y', 't'], time_var='t')
    domain.add('x', [0.0, 2 * np.pi], dataset.S)
    domain.add('y', [0.0, 2 * np.pi], dataset.S)
    domain.add('t', [0.0, data_config['time_interval']], dataset.T)
    num_collo = config['train']['num_domain']
    domain.generate_collocation_points(num_collo)
    init_vals = dataset.get_init_cond()
    num_inits = config['train']['num_init']
    if num_inits > dataset.S ** 2:
        num_inits = dataset.S ** 2
    init_cond = PointsIC(domain, init_vals, var=['x', 'y'], n_values=num_inits)
    bd_cond = periodicBC(domain, ['x', 'y'], n_values=config['train']['num_boundary'])

    # prepare initial condition inputs
    init_input = torch.tensor(init_cond.input, dtype=torch.float32)
    init_val = torch.tensor(init_cond.val, dtype=torch.float32)

    # prepare boundary condition inputs
    upper_input0 = torch.tensor(bd_cond.upper[0], dtype=torch.float32).squeeze().t()     # shape N x 3
    upper_input1 = torch.tensor(bd_cond.upper[1], dtype=torch.float32).squeeze().t()
    lower_input0 = torch.tensor(bd_cond.lower[0], dtype=torch.float32).squeeze().t()
    lower_input1 = torch.tensor(bd_cond.lower[1], dtype=torch.float32).squeeze().t()

    # prepare collocation points
    collo_input = torch.tensor(domain.X_f, dtype=torch.float32, requires_grad=True)
    weight_net = SAWeight(out_dim=3,
                          num_init=[num_inits],
                          num_bd=[upper_input0.shape[0]] * 2,
                          num_collo=[num_collo] * 4)
    net = DenseNet(config['model']['layers'], config['model']['activation'])
    weight_optim = NAdam(weight_net.parameters(), lr=config['train']['base_lr'])
    net_optim = Adam(net.parameters(), lr=config['train']['base_lr'])

    pbar = tqdm(range(config['train']['epochs']), dynamic_ncols=True)

    start_time = default_timer()
    for e in pbar:
        net.zero_grad()
        weight_net.zero_grad()
        if collo_input.grad is not None:
            collo_input.grad.zero_()

        init_pred = net(init_input) - init_val

        bd_0 = net(upper_input0) - net(lower_input0)
        bd_1 = net(upper_input1) - net(lower_input1)

        predu = net(collo_input)
        pde_residual = pde(collo_input, predu)

        loss = weight_net(init_cond=[init_pred], bd_cond=[bd_0, bd_1], residual=pde_residual)
        loss.backward()
        weight_optim.step()
        net_optim.step()
        pbar.set_description(
            (
                f'Epoch: {e}, loss: {loss.item()}'
            )
        )
        if e % config['train']['log_step'] == 0:
            end_time = default_timer()
            eval(net, dataset, e, time_cost=end_time - start_time, offset=offset, config=config)
            start_time = default_timer()
    print('Done!')


================================================
FILE: baselines/sapinns.py
================================================
import csv
import random
from timeit import default_timer
from tqdm import tqdm
import deepxde as dde
import numpy as np
from baselines.data import NSdata
import torch
from torch.optim import Adam

from tensordiffeq.boundaries import DomainND, periodicBC
from .tqd_utils import PointsIC
from .model import SAWeight

from models.FCN import DenseNet
from train_utils.negadam import NAdam


Re = 500


def forcing(x):
    return - 4 * torch.cos(4 * x[:, 1:2])


def pde(x, u):
    '''
    Args:
        x: (x, y, t)
        u: (u, v, w), where (u,v) is the velocity, w is the vorticity
    Returns: list of pde loss

    '''
    u_vel, v_vel, w = u[:, 0:1], u[:, 1:2], u[:, 2:3]

    u_vel_x = dde.grad.jacobian(u, x, i=0, j=0)
    u_vel_xx = dde.grad.hessian(u, x, component=0, i=0, j=0)
    u_vel_yy = dde.grad.hessian(u, x, component=0, i=1, j=1)

    v_vel_y = dde.grad.jacobian(u, x, i=1, j=1)
    v_vel_xx = dde.grad.hessian(u, x, component=1, i=0, j=0)
    v_vel_yy = dde.grad.hessian(u, x, component=1, i=1, j=1)

    w_vor_x = dde.grad.jacobian(u, x, i=2, j=0)
    w_vor_y = dde.grad.jacobian(u, x, i=2, j=1)
    w_vor_t = dde.grad.jacobian(u, x, i=2, j=2)

    w_vor_xx = dde.grad.hessian(u, x, component=2, i=0, j=0)
    w_vor_yy = dde.grad.hessian(u, x, component=2, i=1, j=1)

    eqn1 = w_vor_t + u_vel * w_vor_x + v_vel * w_vor_y - \
           1 / Re * (w_vor_xx + w_vor_yy) - forcing(x)
    eqn2 = u_vel_x + v_vel_y
    eqn3 = u_vel_xx + u_vel_yy + w_vor_y
    eqn4 = v_vel_xx + v_vel_yy - w_vor_x
    return [eqn1, eqn2, eqn3, eqn4]


def eval(model, dataset,
         step, time_cost,
         offset, config):
    '''
    evaluate test error for the model over dataset
    '''
    test_points, test_vals = dataset.get_test_xyt()

    test_points = torch.tensor(test_points, dtype=torch.float32)
    with torch.no_grad():
        pred = model(test_points).cpu().numpy()
    vel_u_truth = test_vals[:, 0]
    vel_v_truth = test_vals[:, 1]
    vor_truth = test_vals[:, 2]

    vel_u_pred = pred[:, 0]
    vel_v_pred = pred[:, 1]
    vor_pred = pred[:, 2]

    u_err = dde.metrics.l2_relative_error(vel_u_truth, vel_u_pred)
    v_err = dde.metrics.l2_relative_error(vel_v_truth, vel_v_pred)
    vor_err = dde.metrics.l2_relative_error(vor_truth, vor_pred)
    print(f'Instance index : {offset}')
    print(f'L2 relative error in u: {u_err}')
    print(f'L2 relative error in v: {v_err}')
    print(f'L2 relative error in vorticity: {vor_err}')
    with open(config['log']['logfile'], 'a') as f:
        writer = csv.writer(f)
        writer.writerow([offset, u_err, v_err, vor_err, step, time_cost])


def train_sapinn(offset, config, args):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    np.random.seed(seed)
    # construct dataloader
    data_config = config['data']
    if 'datapath2' in data_config:
        dataset = NSdata(datapath1=data_config['datapath'],
                         datapath2=data_config['datapath2'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    else:
        dataset = NSdata(datapath1=data_config['datapath'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    domain = DomainND(['x', 'y', 't'], time_var='t')
    domain.add('x', [0.0, 2 * np.pi], dataset.S)
    domain.add('y', [0.0, 2 * np.pi], dataset.S)
    domain.add('t', [0.0, data_config['time_interval']], dataset.T)
    num_collo = config['train']['num_domain']
    domain.generate_collocation_points(num_collo)
    init_vals = dataset.get_init_cond()
    num_inits = config['train']['num_init']
    if num_inits > dataset.S ** 2:
        num_inits = dataset.S ** 2
    init_cond = PointsIC(domain, init_vals, var=['x', 'y'], n_values=num_inits)
    bd_cond = periodicBC(domain, ['x', 'y'], n_values=config['train']['num_boundary'])

    # prepare initial condition inputs
    init_input = torch.tensor(init_cond.input, dtype=torch.float32)
    init_val = torch.tensor(init_cond.val, dtype=torch.float32)

    # prepare boundary condition inputs
    upper_input0 = torch.tensor(bd_cond.upper[0], dtype=torch.float32).squeeze().t()     # shape N x 3
    upper_input1 = torch.tensor(bd_cond.upper[1], dtype=torch.float32).squeeze().t()
    lower_input0 = torch.tensor(bd_cond.lower[0], dtype=torch.float32).squeeze().t()
    lower_input1 = torch.tensor(bd_cond.lower[1], dtype=torch.float32).squeeze().t()

    # prepare collocation points
    collo_input = torch.tensor(domain.X_f, dtype=torch.float32, requires_grad=True)

    weight_net = SAWeight(out_dim=3,
                          num_init=[num_inits],
                          num_bd=[upper_input0.shape[0]] * 2,
                          num_collo=[num_collo] * 4)
    net = DenseNet(config['model']['layers'], config['model']['activation'])
    weight_optim = NAdam(weight_net.parameters(), lr=config['train']['base_lr'])
    net_optim = Adam(net.parameters(), lr=config['train']['base_lr'])

    pbar = tqdm(range(config['train']['epochs']), dynamic_ncols=True)

    start_time = default_timer()
    for e in pbar:
        net.zero_grad()
        weight_net.zero_grad()
        if collo_input.grad is not None:
            collo_input.grad.zero_()

        init_pred = net(init_input) - init_val

        bd_0 = net(upper_input0) - net(lower_input0)
        bd_1 = net(upper_input1) - net(lower_input1)

        predu = net(collo_input)
        pde_residual = pde(collo_input, predu)

        loss = weight_net(init_cond=[init_pred], bd_cond=[bd_0, bd_1], residual=pde_residual)
        loss.backward()
        weight_optim.step()
        net_optim.step()
        dde.gradients.clear()
        pbar.set_description(
            (
                f'Epoch: {e}, loss: {loss.item()}'
            )
        )

        if e % config['train']['log_step'] == 0:
            end_time = default_timer()
            eval(net, dataset, e, time_cost=end_time - start_time, offset=offset, config=config)
            start_time = default_timer()
    print('Done!')


================================================
FILE: baselines/test.py
================================================
from tqdm import tqdm
import numpy as np

import torch
from torch.utils.data import DataLoader
from baselines.model import DeepONetCP
from baselines.data import DeepONetCPNS, DarcyFlow
from train_utils.losses import LpLoss


def test(model,
         test_loader,
         grid,
         device):
    pbar = tqdm(test_loader, dynamic_ncols=True, smoothing=0.1)
    myloss = LpLoss(size_average=True)
    model.eval()

    test_error = []
    with torch.no_grad():
        for x, y in pbar:
            x = x.to(device)
            y = y.to(device)

            grid = grid.to(device)

            pred = model(x, grid)
            loss = myloss(pred, y)

            test_error.append(loss.item())
            pbar.set_description(
                (
                    f'test error: {loss.item():.5f}'
                )
            )

    mean = np.mean(test_error)
    std = np.std(test_error, ddof=1) / np.sqrt(len(test_error))
    print(f'Averaged test error :{mean}, standard error: {std}')


def test_deeponet_ns(config):
    '''
    Evaluate deeponet model on Navier Stokes equation
    Args:
        config: configurations

    Returns:

    '''
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    batch_size = config['test']['batchsize']
    dataset = DeepONetCPNS(datapath=data_config['datapath'],
                           nx=data_config['nx'], nt=data_config['nt'],
                           sub=data_config['sub'], sub_t=data_config['sub_t'],
                           offset=data_config['offset'], num=data_config['n_sample'],
                           t_interval=data_config['time_interval'])
    test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    u0_dim = dataset.S ** 2
    model = DeepONetCP(branch_layer=[u0_dim] + config['model']['branch_layers'],
                       trunk_layer=[3] + config['model']['trunk_layers']).to(device)
    if 'ckpt' in config['test']:
        ckpt = torch.load(config['test']['ckpt'])
        model.load_state_dict(ckpt['model'])
    grid = test_loader.dataset.xyt
    test(model, test_loader, grid, device=device)


def test_deeponet_darcy(config):
    '''
    Evaluate deeponet mode on Darcy Flow
    '''

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    batch_size = config['test']['batchsize']
    dataset = DarcyFlow(data_config['datapath'],
                        nx=data_config['nx'], sub=data_config['sub'],
                        offset=data_config['offset'], num=data_config['n_sample'])
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    u0_dim = dataset.S ** 2
    model = DeepONetCP(branch_layer=[u0_dim] + config['model']['branch_layers'],
                       trunk_layer=[2] + config['model']['trunk_layers']).to(device)
    if 'ckpt' in config['test']:
        ckpt = torch.load(config['test']['ckpt'])
        model.load_state_dict(ckpt['model'])
        print('Load model weights from %s' % config['test']['ckpt'])

    grid = dataset.mesh.reshape(-1, 2)
    test(model, dataloader, grid, device)

================================================
FILE: baselines/tqd_sapinns.py
================================================
import random
import numpy as np
import csv
from timeit import default_timer

import tensorflow as tf
import deepxde as dde
import tensordiffeq as tdq
from tensordiffeq.models import CollocationSolverND
from tensordiffeq.boundaries import DomainND, periodicBC

from .tqd_utils import PointsIC
from baselines.data import NSdata


Re = 500


def forcing(x):
    return - 4 * tf.math.cos(4 * x)


def bd_model(u_model, x, y, t):
    u = u_model(tf.concat([x, y, t], 1))
    u_vel, v_vel, w = u[:, 0:1], u[:, 1:2], u[:, 2:3]
    return u_vel, v_vel, w


def f_model(u_model, x, y, t):
    inp = tf.concat([x, y, t], 1)
    u = u_model(inp)
    u_vel, v_vel, w = u[:, 0:1], u[:, 1:2], u[:, 2:3]

    u_vel_x = tf.gradients(u_vel, x)[0]
    u_vel_xx = tf.gradients(u_vel_x, x)[0]
    u_vel_y = tf.gradients(u_vel, y)[0]
    u_vel_yy = tf.gradients(u_vel_y, y)[0]

    v_vel_y = tf.gradients(v_vel, y)[0]
    v_vel_x = tf.gradients(v_vel, x)[0]
    v_vel_xx = tf.gradients(v_vel_x, x)[0]
    v_vel_yy = tf.gradients(v_vel_y, y)[0]

    w_vor_x = tf.gradients(w, x)[0]
    w_vor_y = tf.gradients(w, y)[0]
    w_vor_t = tf.gradients(w, t)[0]

    w_vor_xx = tf.gradients(w_vor_x, x)[0]
    w_vor_yy = tf.gradients(w_vor_y, y)[0]

    c1 = tdq.utils.constant(1 / Re)
    eqn1 = w_vor_t + u_vel * w_vor_x + v_vel * w_vor_y - c1 * (w_vor_xx + w_vor_yy) - forcing(x)
    eqn2 = u_vel_x + v_vel_y
    eqn3 = u_vel_xx + u_vel_yy + w_vor_y
    eqn4 = v_vel_xx + v_vel_yy - w_vor_x
    return eqn1, eqn2, eqn3, eqn4


def eval(model, dataset,
         step, time_cost,
         offset, config):
    '''
    evaluate test error for the model over dataset
    '''
    test_points, test_vals = dataset.get_test_xyt()

    pred = model.predict(test_points)
    vel_u_truth = test_vals[:, 0]
    vel_v_truth = test_vals[:, 1]
    vor_truth = test_vals[:, 2]

    vel_u_pred = pred[:, 0]
    vel_v_pred = pred[:, 1]
    vor_pred = pred[:, 2]

    u_err = dde.metrics.l2_relative_error(vel_u_truth, vel_u_pred)
    v_err = dde.metrics.l2_relative_error(vel_v_truth, vel_v_pred)
    vor_err = dde.metrics.l2_relative_error(vor_truth, vor_pred)
    print(f'Instance index : {offset}')
    print(f'L2 relative error in u: {u_err}')
    print(f'L2 relative error in v: {v_err}')
    print(f'L2 relative error in vorticity: {vor_err}')
    with open(config['log']['logfile'], 'a') as f:
        writer = csv.writer(f)
        writer.writerow([offset, u_err, v_err, vor_err, step, time_cost])


def train_sa(offset, config, args):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    np.random.seed(seed)
    # construct dataloader
    data_config = config['data']
    if 'datapath2' in data_config:
        dataset = NSdata(datapath1=data_config['datapath'],
                         datapath2=data_config['datapath2'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    else:
        dataset = NSdata(datapath1=data_config['datapath'],
                         offset=offset, num=1,
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         vel=True,
                         t_interval=data_config['time_interval'])
    domain = DomainND(['x', 'y', 't'], time_var='t')
    domain.add('x', [0.0, 2 * np.pi], dataset.S)
    domain.add('y', [0.0, 2 * np.pi], dataset.S)
    domain.add('t', [0.0, data_config['time_interval']], dataset.T)
    domain.generate_collocation_points(config['train']['num_domain'])
    model = CollocationSolverND()
    init_vals = dataset.get_init_cond()
    num_inits = config['train']['num_init']
    if num_inits > dataset.S ** 2:
        num_inits = dataset.S ** 2
    init_cond = PointsIC(domain, init_vals, var=['x', 'y'], n_values=num_inits)
    bd_cond = periodicBC(domain, ['x', 'y'], [bd_model], n_values=config['train']['num_boundary'])
    BCs = [init_cond, bd_cond]

    dict_adaptive = {'residual': [True, True, True, True],
                     'BCs': [True, False]}
    init_weights = {
        'residual': [tf.random.uniform([config['train']['num_domain'], 1]),
                     tf.random.uniform([config['train']['num_domain'], 1]),
                     tf.random.uniform([config['train']['num_domain'], 1]),
                     tf.random.uniform([config['train']['num_domain'], 1])],
        'BCs': [100 * tf.random.uniform([num_inits, 1]),
                100 * tf.ones([config['train']['num_boundary'], 1])]
    }

    model.compile(config['model']['layers'], f_model, domain, BCs,
                  isAdaptive=True, dict_adaptive=dict_adaptive, init_weights=init_weights)

    if 'log_step' in config['train']:
        step_size = config['train']['log_step']
    else:
        step_size = 100
    epochs = config['train']['epochs'] // step_size

    for i in range(epochs):
        time_start = default_timer()
        model.fit(tf_iter=step_size)
        time_end = default_timer()
        eval(model, dataset, i * step_size,
             time_cost=time_end - time_start,
             offset=offset,
             config=config)
    print('Done!')


================================================
FILE: baselines/tqd_utils.py
================================================
import numpy as np

from tensordiffeq.boundaries import BC
from tensordiffeq.utils import flatten_and_stack, multimesh, MSE, convertTensor


class PointsIC(BC):
    '''
    Create Initial condition class from array on domain
    '''
    def __init__(self, domain, values, var, n_values=None):
        '''
        args:
            - domain:
            - values:
        '''
        super(PointsIC, self).__init__()
        self.isInit = True
        self.n_values = n_values
        self.domain = domain
        self.values = values
        self.vars = var
        self.isInit = True
        self.dicts_ = [item for item in self.domain.domaindict if item['identifier'] != self.domain.time_var]
        self.dict_ = next(item for item in self.domain.domaindict if item["identifier"] == self.domain.time_var)
        self.compile()
        self.create_target(self.values)

    def create_input(self):
        dims = self.get_not_dims(self.domain.time_var)
        mesh = flatten_and_stack(multimesh(dims))
        t_repeat = np.repeat(0.0, len(mesh))

        mesh = np.concatenate((mesh, np.reshape(t_repeat, (-1, 1))), axis=1)
        if self.n_values is not None:
            self.nums = np.random.randint(0, high=len(mesh), size=self.n_values)
            mesh = mesh[self.nums]
        return mesh

    def create_target(self, values):
        # for i, var_ in enumerate(self.vars):
        #     arg_list = []
        #     for j, var in enumerate(var_):
        #         var_dict = self.get_dict(var)
        #         arg_list.append(get_linspace(var_dict))
        #     inp = flatten_and_stack(multimesh(arg_list))
        #     fun_vals.append(self.fun[i](*inp.T))
        if self.n_values is not None:
            self.val = np.reshape(values, (-1, 3))[self.nums]
        else:
            self.val = np.reshape(values, (-1, 3))

    def loss(self):
        return MSE(self.preds, self.val)

================================================
FILE: baselines/train_darcy.py
================================================
from tqdm import tqdm

import torch
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import MultiStepLR

from baselines.model import DeepONetCP
from train_utils.losses import LpLoss
from train_utils.utils import save_checkpoint
from baselines.data import DarcyFlow


def train_deeponet_darcy(config):
    '''
    train deepONet for darcy flow
    '''
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    batch_size = config['train']['batchsize']
    dataset = DarcyFlow(data_config['datapath'],
                        nx=data_config['nx'], sub=data_config['sub'],
                        offset=data_config['offset'], num=data_config['n_sample'])
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    u0_dim = dataset.S ** 2
    model = DeepONetCP(branch_layer=[u0_dim] + config['model']['branch_layers'],
                       trunk_layer=[2] + config['model']['trunk_layers']).to(device)
    optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
    scheduler = MultiStepLR(optimizer, milestones=config['train']['milestones'],
                            gamma=config['train']['scheduler_gamma'])
    pbar = range(config['train']['epochs'])
    pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.1)
    myloss = LpLoss(size_average=True)
    model.train()
    grid = dataset.mesh
    grid = grid.reshape(-1, 2).to(device)  # grid value, (SxS, 2)
    for e in pbar:
        train_loss = 0.0
        for x, y in dataloader:
            x = x.to(device)  # initial condition, (batchsize, u0_dim)

            y = y.to(device)  # ground truth, (batchsize, SxS)

            pred = model(x, grid)
            loss = myloss(pred, y)

            model.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * y.shape[0]
        train_loss /= len(dataset)
        scheduler.step()

        pbar.set_description(
            (
                f'Epoch: {e}; Averaged train loss: {train_loss:.5f}; '
            )
        )
        if e % 1000 == 0:
            print(f'Epoch: {e}, averaged train loss: {train_loss:.5f}')
            save_checkpoint(config['train']['save_dir'],
                            config['train']['save_name'].replace('.pt', f'_{e}.pt'),
                            model, optimizer)
    save_checkpoint(config['train']['save_dir'],
                    config['train']['save_name'],
                    model, optimizer)

================================================
FILE: baselines/train_ns.py
================================================
from tqdm import tqdm

import torch
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import MultiStepLR

from baselines.model import DeepONet, DeepONetCP
from baselines.data import DeepOnetNS, DeepONetCPNS
from train_utils.losses import LpLoss
from train_utils.utils import save_checkpoint
from train_utils.data_utils import sample_data


def train_deeponet_cp(config):
    '''
    Train Cartesian product DeepONet
    Args:
        config:

    Returns:
    '''
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    batch_size = config['train']['batchsize']
    dataset = DeepONetCPNS(datapath=data_config['datapath'],
                           nx=data_config['nx'], nt=data_config['nt'],
                           sub=data_config['sub'], sub_t=data_config['sub_t'],
                           offset=data_config['offset'], num=data_config['n_sample'],
                           t_interval=data_config['time_interval'])
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    u0_dim = dataset.S ** 2
    model = DeepONetCP(branch_layer=[u0_dim] + config['model']['branch_layers'],
                       trunk_layer=[3] + config['model']['trunk_layers']).to(device)
    optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
    scheduler = MultiStepLR(optimizer, milestones=config['train']['milestones'],
                            gamma=config['train']['scheduler_gamma'])
    pbar = range(config['train']['epochs'])
    pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.1)
    myloss = LpLoss(size_average=True)
    model.train()

    for e in pbar:
        train_loss = 0.0
        for x, y in train_loader:
            x = x.to(device)  # initial condition, (batchsize, u0_dim)
            grid = dataset.xyt
            grid = grid.to(device)  # grid value, (SxSxT, 3)
            y = y.to(device)  # ground truth, (batchsize, SxSxT)

            pred = model(x, grid)
            loss = myloss(pred, y)

            model.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * y.shape[0]
        train_loss /= len(dataset)
        scheduler.step()

        pbar.set_description(
            (
                f'Epoch: {e}; Averaged train loss: {train_loss:.5f}; '
            )
        )
        if e % 500 == 0:
            print(f'Epoch: {e}, averaged train loss: {train_loss:.5f}')
            save_checkpoint(config['train']['save_dir'],
                            config['train']['save_name'].replace('.pt', f'_{e}.pt'),
                            model, optimizer)


def train_deeponet(config):
    '''
    train plain DeepOnet
    Args:
        config:

    Returns:

    '''
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    dataset = DeepOnetNS(datapath=data_config['datapath'],
                         nx=data_config['nx'], nt=data_config['nt'],
                         sub=data_config['sub'], sub_t=data_config['sub_t'],
                         offset=data_config['offset'], num=data_config['n_sample'],
                         t_interval=data_config['time_interval'])
    train_loader = DataLoader(dataset, batch_size=config['train']['batchsize'], shuffle=False)

    u0_dim = dataset.S ** 2
    model = DeepONet(branch_layer=[u0_dim] + config['model']['branch_layers'],
                     trunk_layer=[3] + config['model']['trunk_layers']).to(device)
    optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
    scheduler = MultiStepLR(optimizer, milestones=config['train']['milestones'],
                            gamma=config['train']['scheduler_gamma'])

    pbar = range(config['train']['epochs'])
    pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.1)
    myloss = LpLoss(size_average=True)
    model.train()
    loader = sample_data(train_loader)
    for e in pbar:
        u0, x, y = next(loader)
        u0 = u0.to(device)
        x = x.to(device)
        y = y.to(device)
        pred = model(u0, x)
        loss = myloss(pred, y)
        model.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        pbar.set_description(
            (
                f'Epoch: {e}; Train loss: {loss.item():.5f}; '
            )
        )
    save_checkpoint(config['train']['save_dir'],
                    config['train']['save_name'],
                    model, optimizer)
    print('Done!')


================================================
FILE: baselines/unet3d.py
================================================
from functools import partial

import torch
from torch import nn as nn
from torch.nn import functional as F


# UNet3d from https://github.com/wolny/pytorch-3dunet


class BaseModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.device_indicator_param = nn.Parameter(torch.empty(0))

    @property
    def device(self):
        """Returns the device that the model is on."""
        return self.device_indicator_param.device

    def data_dict_to_input(self, data_dict, **kwargs):
        """
        Convert data dictionary to appropriate input for the model.
        """
        raise NotImplementedError

    def loss_dict(self, data_dict, **kwargs):
        """
        Compute the loss dictionary for the model.
        """
        raise NotImplementedError

    @torch.no_grad()
    def eval_dict(self, data_dict, **kwargs):
        """
        Compute the evaluation dictionary for the model.
        """
        raise NotImplementedError

def create_conv(
    in_channels, out_channels, kernel_size, order, num_groups, padding, is3d
):
    """
    Create a list of modules with together constitute a single conv layer with non-linearity
    and optional batchnorm/groupnorm.

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        kernel_size(int or tuple): size of the convolving kernel
        order (string): order of things, e.g.
            'cr' -> conv + ReLU
            'gcr' -> groupnorm + conv + ReLU
            'cl' -> conv + LeakyReLU
            'ce' -> conv + ELU
            'bcr' -> batchnorm + conv + ReLU
        num_groups (int): number of groups for the GroupNorm
        padding (int or tuple): add zero-padding added to all three sides of the input
        is3d (bool): is3d (bool): if True use Conv3d, otherwise use Conv2d
    Return:
        list of tuple (name, module)
    """
    assert "c" in order, "Conv layer MUST be present"
    assert (
        order[0] not in "rle"
    ), "Non-linearity cannot be the first operation in the layer"

    modules = []
    for i, char in enumerate(order):
        if char == "r":
            modules.append(("ReLU", nn.ReLU(inplace=True)))
        elif char == "l":
            modules.append(("LeakyReLU", nn.LeakyReLU(inplace=True)))
        elif char == "e":
            modules.append(("ELU", nn.ELU(inplace=True)))
        elif char == "c":
            # add learnable bias only in the absence of batchnorm/groupnorm
            bias = not ("g" in order or "b" in order)
            if is3d:
                conv = nn.Conv3d(
                    in_channels, out_channels, kernel_size, padding=padding, bias=bias
                )
            else:
                conv = nn.Conv2d(
                    in_channels, out_channels, kernel_size, padding=padding, bias=bias
                )

            modules.append(("conv", conv))
        elif char == "g":
            is_before_conv = i < order.index("c")
            if is_before_conv:
                num_channels = in_channels
            else:
                num_channels = out_channels

            # use only one group if the given number of groups is greater than the number of channels
            if num_channels < num_groups:
                num_groups = 1

            assert (
                num_channels % num_groups == 0
            ), f"Expected number of channels in input to be divisible by num_groups. num_channels={num_channels}, num_groups={num_groups}"
            modules.append(
                (
                    "groupnorm",
                    nn.GroupNorm(num_groups=num_groups, num_channels=num_channels),
                )
            )
        elif char == "b":
            is_before_conv = i < order.index("c")
            if is3d:
                bn = nn.BatchNorm3d
            else:
                bn = nn.BatchNorm2d

            if is_before_conv:
                modules.append(("batchnorm", bn(in_channels)))
            else:
                modules.append(("batchnorm", bn(out_channels)))
        else:
            raise ValueError(
                f"Unsupported layer type '{char}'. MUST be one of ['b', 'g', 'r', 'l', 'e', 'c']"
            )

    return modules


class SingleConv(nn.Sequential):
    """
    Basic convolutional module consisting of a Conv3d, non-linearity and optional batchnorm/groupnorm. The order
    of operations can be specified via the `order` parameter

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        kernel_size (int or tuple): size of the convolving kernel
        order (string): determines the order of layers, e.g.
            'cr' -> conv + ReLU
            'crg' -> conv + ReLU + groupnorm
            'cl' -> conv + LeakyReLU
            'ce' -> conv + ELU
        num_groups (int): number of groups for the GroupNorm
        padding (int or tuple): add zero-padding
        is3d (bool): if True use Conv3d, otherwise use Conv2d
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size=3,
        order="gcr",
        num_groups=8,
        padding=1,
        is3d=True,
    ):
        super(SingleConv, self).__init__()

        for name, module in create_conv(
            in_channels, out_channels, kernel_size, order, num_groups, padding, is3d
        ):
            self.add_module(name, module)


class DoubleConv(nn.Sequential):
    """
    A module consisting of two consecutive convolution layers (e.g. BatchNorm3d+ReLU+Conv3d).
    We use (Conv3d+ReLU+GroupNorm3d) by default.
    This can be changed however by providing the 'order' argument, e.g. in order
    to change to Conv3d+BatchNorm3d+ELU use order='cbe'.
    Use padded convolutions to make sure that the output (H_out, W_out) is the same
    as (H_in, W_in), so that you don't have to crop in the decoder path.

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        encoder (bool): if True we're in the encoder path, otherwise we're in the decoder
        kernel_size (int or tuple): size of the convolving kernel
        order (string): determines the order of layers, e.g.
            'cr' -> conv + ReLU
            'crg' -> conv + ReLU + groupnorm
            'cl' -> conv + LeakyReLU
            'ce' -> conv + ELU
        num_groups (int): number of groups for the GroupNorm
        padding (int or tuple): add zero-padding added to all three sides of the input
        is3d (bool): if True use Conv3d instead of Conv2d layers
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        encoder,
        kernel_size=3,
        order="gcr",
        num_groups=8,
        padding=1,
        is3d=True,
    ):
        super(DoubleConv, self).__init__()
        if encoder:
            # we're in the encoder path
            conv1_in_channels = in_channels
            conv1_out_channels = out_channels // 2
            if conv1_out_channels < in_channels:
                conv1_out_channels = in_channels
            conv2_in_channels, conv2_out_channels = conv1_out_channels, out_channels
        else:
            # we're in the decoder path, decrease the number of channels in the 1st convolution
            conv1_in_channels, conv1_out_channels = in_channels, out_channels
            conv2_in_channels, conv2_out_channels = out_channels, out_channels

        # conv1
        self.add_module(
            "SingleConv1",
            SingleConv(
                conv1_in_channels,
                conv1_out_channels,
                kernel_size,
                order,
                num_groups,
                padding=padding,
                is3d=is3d,
            ),
        )
        # conv2
        self.add_module(
            "SingleConv2",
            SingleConv(
                conv2_in_channels,
                conv2_out_channels,
                kernel_size,
                order,
                num_groups,
                padding=padding,
                is3d=is3d,
            ),
        )


class Encoder(nn.Module):
    """
    A single module from the encoder path consisting of the optional max
    pooling layer (one may specify the MaxPool kernel_size to be different
    from the standard (2,2,2), e.g. if the volumetric data is anisotropic
    (make sure to use complementary scale_factor in the decoder path) followed by
    a basic module (DoubleConv or ResNetBlock).

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        conv_kernel_size (int or tuple): size of the convolving kernel
        apply_pooling (bool): if True use MaxPool3d before DoubleConv
        pool_kernel_size (int or tuple): the size of the window
        pool_type (str): pooling layer: 'max' or 'avg'
        basic_module(nn.Module): either ResNetBlock or DoubleConv
        conv_layer_order (string): determines the order of layers
            in `DoubleConv` module. See `DoubleConv` for more info.
        num_groups (int): number of groups for the GroupNorm
        padding (int or tuple): add zero-padding added to all three sides of the input
        is3d (bool): use 3d or 2d convolutions/pooling operation
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        conv_kernel_size=3,
        apply_pooling=True,
        pool_kernel_size=2,
        pool_type="max",
        basic_module=DoubleConv,
        conv_layer_order="gcr",
        num_groups=8,
        padding=1,
        is3d=True,
    ):
        super(Encoder, self).__init__()
        assert pool_type in ["max", "avg"]
        if apply_pooling:
            if pool_type == "max":
                if is3d:
                    self.pooling = nn.MaxPool3d(kernel_size=pool_kernel_size)
                else:
                    self.pooling = nn.MaxPool2d(kernel_size=pool_kernel_size)
            else:
                if is3d:
                    self.pooling = nn.AvgPool3d(kernel_size=pool_kernel_size)
                else:
                    self.pooling = nn.AvgPool2d(kernel_size=pool_kernel_size)
        else:
            self.pooling = None

        self.basic_module = basic_module(
            in_channels,
            out_channels,
            encoder=True,
            kernel_size=conv_kernel_size,
            order=conv_layer_order,
            num_groups=num_groups,
            padding=padding,
            is3d=is3d,
        )

    def forward(self, x):
        if self.pooling is not None:
            x = self.pooling(x)
        x = self.basic_module(x)
        return x


class Decoder(nn.Module):
    """
    A single module for decoder path consisting of the upsampling layer
    (either learned ConvTranspose3d or nearest neighbor interpolation)
    followed by a basic module (DoubleConv or ResNetBlock).

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output channels
        conv_kernel_size (int or tuple): size of the convolving kernel
        scale_factor (tuple): used as the multiplier for the image H/W/D in
            case of nn.Upsample or as stride in case of ConvTranspose3d, must reverse the MaxPool3d operation
            from the corresponding encoder
        basic_module(nn.Module): either ResNetBlock or DoubleConv
        conv_layer_order (string): determines the order of layers
            in `DoubleConv` module. See `DoubleConv` for more info.
        num_groups (int): number of groups for the GroupNorm
        padding (int or tuple): add zero-padding added to all three sides of the input
        upsample (bool): should the input be upsampled
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        conv_kernel_size=3,
        scale_factor=(2, 2, 2),
        basic_module=DoubleConv,
        conv_layer_order="gcr",
        num_groups=8,
        mode="nearest",
        padding=1,
        upsample=True,
        is3d=True,
    ):
        super(Decoder, self).__init__()

        if upsample:
            if basic_module == DoubleConv:
                # if DoubleConv is the basic_module use interpolation for upsampling and concatenation joining
                self.upsampling = InterpolateUpsampling(mode=mode)
                # concat joining
                self.joining = partial(self._joining, concat=True)
            else:
                # if basic_module=ResNetBlock use transposed convolution upsampling and summation joining
                self.upsampling = TransposeConvUpsampling(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=conv_kernel_size,
                    scale_factor=scale_factor,
                )
                # sum joining
                self.joining = partial(self._joining, concat=False)
                # adapt the number of in_channels for the ResNetBlock
                in_channels = out_channels
        else:
            # no upsampling
            self.upsampling = NoUpsampling()
            # concat joining
            self.joining = partial(self._joining, concat=True)

        self.basic_module = basic_module(
            in_channels,
            out_channels,
            encoder=False,
            kernel_size=conv_kernel_size,
            order=conv_layer_order,
            num_groups=num_groups,
            padding=padding,
            is3d=is3d,
        )

    def forward(self, encoder_features, x):
        x = self.upsampling(encoder_features=encoder_features, x=x)
        x = self.joining(encoder_features, x)
        x = self.basic_module(x)
        return x

    @staticmethod
    def _joining(encoder_features, x, concat):
        if concat:
            return torch.cat((encoder_features, x), dim=1)
        else:
            return encoder_features + x


def create_encoders(
    in_channels,
    f_maps,
    basic_module,
    conv_kernel_size,
    conv_padding,
    layer_order,
    num_groups,
    pool_kernel_size,
    is3d,
):
    # create encoder path consisting of Encoder modules. Depth of the encoder is equal to `len(f_maps)`
    encoders = []
    for i, out_feature_num in enumerate(f_maps):
        if i == 0:
            # apply conv_coord only in the first encoder if any
            encoder = Encoder(
                in_channels,
                out_feature_num,
                apply_pooling=False,  # skip pooling in the firs encoder
                basic_module=basic_module,
                conv_layer_order=layer_order,
                conv_kernel_size=conv_kernel_size,
                num_groups=num_groups,
                padding=conv_padding,
                is3d=is3d,
            )
        else:
            encoder = Encoder(
                f_maps[i - 1],
                out_feature_num,
                basic_module=basic_module,
                conv_layer_order=layer_order,
                conv_kernel_size=conv_kernel_size,
                num_groups=num_groups,
                pool_kernel_size=pool_kernel_size,
                padding=conv_padding,
                is3d=is3d,
            )

        encoders.append(encoder)

    return nn.ModuleList(encoders)


def create_decoders(
    f_maps, basic_module, conv_kernel_size, conv_padding, layer_order, num_groups, is3d
):
    # create decoder path consisting of the Decoder modules. The length of the decoder list is equal to `len(f_maps) - 1`
    decoders = []
    reversed_f_maps = list(reversed(f_maps))
    for i in range(len(reversed_f_maps) - 1):
        if basic_module == DoubleConv:
            in_feature_num = reversed_f_maps[i] + reversed_f_maps[i + 1]
        else:
            in_feature_num = reversed_f_maps[i]

        out_feature_num = reversed_f_maps[i + 1]

        decoder = Decoder(
            in_feature_num,
            out_feature_num,
            basic_module=basic_module,
            conv_layer_order=layer_order,
            conv_kernel_size=conv_kernel_size,
            num_groups=num_groups,
            padding=conv_padding,
            is3d=is3d,
        )
        decoders.append(decoder)
    return nn.ModuleList(decoders)


class AbstractUpsampling(nn.Module):
    """
    Abstract class for upsampling. A given implementation should upsample a given 5D input tensor using either
    interpolation or learned transposed convolution.
    """

    def __init__(self, upsample):
        super(AbstractUpsampling, self).__init__()
        self.upsample = upsample

    def forward(self, encoder_features, x):
        # get the spatial dimensions of the output given the encoder_features
        output_size = encoder_features.size()[2:]
        # upsample the input and return
        return self.upsample(x, output_size)


class InterpolateUpsampling(AbstractUpsampling):
    """
    Args:
        mode (str): algorithm used for upsampling:
            'nearest' | 'linear' | 'bilinear' | 'trilinear' | 'area'. Default: 'nearest'
            used only if transposed_conv is False
    """

    def __init__(self, mode="nearest"):
        upsample = partial(self._interpolate, mode=mode)
        super().__init__(upsample)

    @staticmethod
    def _interpolate(x, size, mode):
        return F.interpolate(x, size=size, mode=mode)


class TransposeConvUpsampling(AbstractUpsampling):
    """
    Args:
        in_channels (int): number of input channels for transposed conv
            used only if transposed_conv is True
        out_channels (int): number of output channels for transpose conv
            used only if transposed_conv is True
        kernel_size (int or tuple): size of the convolving kernel
            used only if transposed_conv is True
        scale_factor (int or tuple): stride of the convolution
            used only if transposed_conv is True

    """

    def __init__(
        self, in_channels=None, out_channels=None, kernel_size=3, scale_factor=(2, 2, 2)
    ):
        # make sure that the output size reverses the MaxPool3d from the corresponding encoder
        upsample = nn.ConvTranspose3d(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=scale_factor,
            padding=1,
        )
        super().__init__(upsample)


class NoUpsampling(AbstractUpsampling):
    def __init__(self):
        super().__init__(self._no_upsampling)

    @staticmethod
    def _no_upsampling(x, size):
        return x


def number_of_features_per_level(init_channel_number, num_levels):
    return [init_channel_number * 2**k for k in range(num_levels)]


class AbstractUNet(BaseModel):
    """
    Base class for standard and residual UNet.

    Args:
        in_channels (int): number of input channels
        out_channels (int): number of output segmentation masks;
            Note that the of out_channels might correspond to either
            different semantic classes or to different binary segmentation mask.
            It's up to the user of the class to interpret the out_channels and
            use the proper loss criterion during training (i.e. CrossEntropyLoss (multi-class)
            or BCEWithLogitsLoss (two-class) respectively)
        f_maps (int, tuple): number of feature maps at each level of the encoder; if it's an integer the number
            of feature maps is given by the geometric progression: f_maps ^ k, k=1,2,3,4
        final_sigmoid (bool): if True apply element-wise nn.Sigmoid after the final 1x1 convolution,
            otherwise apply nn.Softmax. In effect only if `self.training == False`, i.e. during validation/testing
        basic_module: basic model for the encoder/decoder (DoubleConv, ResNetBlock, ....)
        layer_order (string): determines the order of layers in `SingleConv` module.
            E.g. 'crg' stands for GroupNorm3d+Conv3d+ReLU. See `SingleConv` for more info
        num_groups (int): number of groups for the GroupNorm
        num_levels (int): number of levels in the encoder/decoder path (applied only if f_maps is an int)
            default: 4
        is_segmentation (bool): if True and the model is in eval mode, Sigmoid/Softmax normalization is applied
            after the final convolution; if False (regression problem) the normalization layer is skipped
        conv_kernel_size (int or tuple): size of the convolving kernel in the basic_module
        pool_kernel_size (int or tuple): the size of the window
        conv_padding (int or tuple): add zero-padding added to all three sides of the input
        is3d (bool): if True the model is 3D, otherwise 2D, default: True
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        final_sigmoid,
        basic_module,
        f_maps=64,
        layer_order="gcr",
        num_groups=8,
        num_levels=4,
        is_segmentation=False,
        conv_kernel_size=3,
        pool_kernel_size=2,
        conv_padding=1,
        is3d=True,
    ):
        super(AbstractUNet, self).__init__()

        if isinstance(f_maps, int):
            f_maps = number_of_features_per_level(f_maps, num_levels=num_levels)

        assert isinstance(f_maps, list) or isinstance(f_maps, tuple)
        assert len(f_maps) > 1, "Required at least 2 levels in the U-Net"
        if "g" in layer_order:
            assert (
                num_groups is not None
            ), "num_groups must be specified if GroupNorm is used"

        # create encoder path
        self.encoders = create_encoders(
            in_channels,
            f_maps,
            basic_module,
            conv_kernel_size,
            conv_padding,
            layer_order,
            num_groups,
            pool_kernel_size,
            is3d,
        )

        # create decoder path
        self.decoders = create_decoders(
            f_maps,
            basic_module,
            conv_kernel_size,
            conv_padding,
            layer_order,
            num_groups,
            is3d,
        )

        # in the last layer a 1×1 convolution reduces the number of output channels to the number of labels
        if is3d:
            self.final_conv = nn.Conv3d(f_maps[0], out_channels, 1)
        else:
            self.final_conv = nn.Conv2d(f_maps[0], out_channels, 1)

        if is_segmentation:
            # semantic segmentation problem
            if final_sigmoid:
                self.final_activation = nn.Sigmoid()
            else:
                self.final_activation = nn.Softmax(dim=1)
        else:
            # regression problem
            self.final_activation = None

    def forward(self, x):
        # encoder part
        encoders_features = []
        for encoder in self.encoders:
            x = encoder(x)
            # reverse the encoder outputs to be aligned with the decoder
            encoders_features.insert(0, x)

        # remove the last encoder's output from the list
        # !!remember: it's the 1st in the list
        encoders_features = encoders_features[1:]

        # decoder part
        for decoder, encoder_features in zip(self.decoders, encoders_features):
            # pass the output from the corresponding encoder and the output
            # of the previous decoder
            x = decoder(encoder_features, x)

        x = self.final_conv(x)

        # apply final_activation (i.e. Sigmoid or Softmax) only during prediction.
        # During training the network outputs logits
        if not self.training and self.final_activation is not None:
            x = self.final_activation(x)

        return x


class UNet3D(AbstractUNet):
    """
    3DUnet model from
    `"3D U-Net: Learning Dense Volumetric Segmentation from Sparse Annotation"
        <https://arxiv.org/pdf/1606.06650.pdf>`.

    Uses `DoubleConv` as a basic_module and nearest neighbor upsampling in the decoder
    """

    def __init__(
        self,
        in_channels,
        out_channels,
        final_sigmoid=False,
        f_maps=64,
        layer_order="gcr",
        num_groups=8,
        num_levels=4,
        is_segmentation=False,
        conv_padding=1,
        **kwargs,
    ):
        super(UNet3D, self).__init__(
            in_channels=in_channels,
            out_channels=out_channels,
            final_sigmoid=final_sigmoid,
            basic_module=DoubleConv,
            f_maps=f_maps,
            layer_order=layer_order,
            num_groups=num_groups,
            num_levels=num_levels,
            is_segmentation=is_segmentation,
            conv_padding=conv_padding,
            is3d=True,
        )


================================================
FILE: baselines/utils.py
================================================
import numpy as np

import torch
import torch.autograd as autograd


def weighted_mse(pred, target, weight=None):
    if weight is None:
        return torch.mean((pred - target) ** 2)
    else:
        return torch.mean(weight * (pred - target) ** 2)


def get_3dboundary_points(num_x,                # number of points on x axis
                          num_y,                # number of points on y axis
                          num_t,                # number of points on t axis
                          bot=(0, 0, 0),        # lower bound
                          top=(1.0, 1.0, 1.0)   # upper bound
                          ):
    x_top, y_top, t_top = top
    x_bot, y_bot, t_bot = bot

    x_arr = np.linspace(x_bot, x_top, num=num_x, endpoint=False)
    y_arr = np.linspace(y_bot, y_top, num=num_y, endpoint=False)
    xx, yy = np.meshgrid(x_arr, y_arr, indexing='ij')
    xarr = np.ravel(xx)
    yarr = np.ravel(yy)
    tarr = np.ones_like(xarr) * t_bot
    point0 = np.stack([xarr, yarr, tarr], axis=0).T  # (SxSx1, 3), boundary on t=0

    t_arr = np.linspace(t_bot, t_top, num=num_t)
    yy, tt = np.meshgrid(y_arr, t_arr, indexing='ij')
    yarr = np.ravel(yy)
    tarr = np.ravel(tt)
    xarr = np.ones_like(yarr) * x_bot
    point2 = np.stack([xarr, yarr, tarr], axis=0).T  # (1xSxT, 3), boundary on x=0

    xarr = np.ones_like(yarr) * x_top
    point3 = np.stack([xarr, yarr, tarr], axis=0).T  # (1xSxT, 3), boundary on x=2pi

    xx, tt = np.meshgrid(x_arr, t_arr, indexing='ij')
    xarr = np.ravel(xx)
    tarr = np.ravel(tt)
    yarr = np.ones_like(xarr) * y_bot
    point4 = np.stack([xarr, yarr, tarr], axis=0).T  # (128x1x65, 3), boundary on y=0

    yarr = np.ones_like(xarr) * y_top
    point5 = np.stack([xarr, yarr, tarr], axis=0).T  # (128x1x65, 3), boundary on y=2pi

    points = np.concatenate([point0,
                             point2, point3,
                             point4, point5],
                            axis=0)
    return points


def get_3dboundary(value):
    boundary0 = value[0, :, :, 0:1]  # 128x128x1, boundary on t=0
    # boundary1 = value[0, :, :, -1:]     # 128x128x1, boundary on t=0.5
    boundary2 = value[0, 0:1, :, :]  # 1x128x65, boundary on x=0
    boundary3 = value[0, -1:, :, :]  # 1x128x65, boundary on x=1
    boundary4 = value[0, :, 0:1, :]  # 128x1x65, boundary on y=0
    boundary5 = value[0, :, -1:, :]  # 128x1x65, boundary on y=1

    part0 = np.ravel(boundary0)
    # part1 = np.ravel(boundary1)
    part2 = np.ravel(boundary2)
    part3 = np.ravel(boundary3)
    part4 = np.ravel(boundary4)
    part5 = np.ravel(boundary5)
    boundary = np.concatenate([part0,
                               part2, part3,
                               part4, part5],
                              axis=0)[:, np.newaxis]
    return boundary


def get_xytgrid(S, T, bot=[0, 0, 0], top=[1, 1, 1]):
    '''
    Args:
        S: number of points on each spatial domain
        T: number of points on temporal domain including endpoint
        bot: list or tuple, lower bound on each dimension
        top: list or tuple, upper bound on each dimension

    Returns:
        (S * S * T, 3) array
    '''
    x_arr = np.linspace(bot[0], top[0], num=S, endpoint=False)
    y_arr = np.linspace(bot[1], top[1], num=S, endpoint=False)
    t_arr = np.linspace(bot[2], top[2], num=T)
    xgrid, ygrid, tgrid = np.meshgrid(x_arr, y_arr, t_arr, indexing='ij')
    xaxis = np.ravel(xgrid)
    yaxis = np.ravel(ygrid)
    taxis = np.ravel(tgrid)
    points = np.stack([xaxis, yaxis, taxis], axis=0).T
    return points


def get_2dgird(num=31):
    x = np.linspace(-1, 1, num)
    y = np.linspace(-1, 1, num)
    gridx, gridy = np.meshgrid(x, y)
    xs = gridx.reshape(-1, 1)
    ys = gridy.reshape(-1, 1)
    result = np.hstack((xs, ys))
    return result


def get_3dgrid(num=11):
    x = np.linspace(-1, 1, num)
    y = np.linspace(-1, 1, num)
    z = np.linspace(-1, 1, num)
    gridx, gridy, gridz = np.meshgrid(x, y, z)
    xs = gridx.reshape(-1, 1)
    ys = gridy.reshape(-1, 1)
    zs = gridz.reshape(-1, 1)
    return np.hstack((xs, ys, zs))


def get_4dgrid(num=11):
    '''
    4-D meshgrid
    Args:
        num: number of collocation points of each dimension

    Returns:
        (num**4, 4) tensor
    '''
    t = np.linspace(0, 1, num)
    x = np.linspace(-1, 1, num)
    y = np.linspace(-1, 1, num)
    z = np.linspace(-1, 1, num)
    gridx, gridy, gridz, gridt = np.meshgrid(x, y, z, t)
    xs = gridx.reshape(-1, 1)
    ys = gridy.reshape(-1, 1)
    zs = gridz.reshape(-1, 1)
    ts = gridt.reshape(-1, 1)
    result = np.hstack((xs, ys, zs, ts))
    return result


def vel2vor(u, v, x, y):
    u_y, = autograd.grad(outputs=[u.sum()], inputs=[y], create_graph=True)
    v_x, = autograd.grad(outputs=[v.sum()], inputs=[x], create_graph=True)
    vorticity = - u_y + v_x
    return vorticity


def sub_mse(vec):
    '''
    Compute mse of two parts of a vector
    Args:
        vec:

    Returns:

    '''
    length = vec.shape[0] // 2
    diff = (vec[:length] - vec[length: 2 * length]) ** 2
    return diff.mean()


def get_sample(npt=100):
    num = npt // 2
    bc1_y_sample = torch.rand(size=(num, 1)).repeat(2, 1)
    bc1_t_sample = torch.rand(size=(num, 1)).repeat(2, 1)

    bc1_x_sample = torch.cat([torch.zeros(num, 1), torch.ones(num, 1)], dim=0)

    bc2_x_sample = torch.rand(size=(num, 1)).repeat(2, 1)
    bc2_t_sample = torch.rand(size=(num, 1)).repeat(2, 1)

    bc2_y_sample = torch.cat([torch.zeros(num, 1), torch.ones(num, 1)], dim=0)
    return bc1_x_sample, bc1_y_sample, bc1_t_sample, \
           bc2_x_sample, bc2_y_sample, bc2_t_sample


def concat(xy, z, t=0.0, offset=0):
    '''
    Args:
        xy: (N, 2)
        z: (N, 1）
        t: (N, 1)
        offset: start index of xy
    Returns:
        (N, 4) array
    '''
    output = np.zeros((z.shape[0], 4)) * t
    if offset < 2:
        output[:, offset: offset+2] = xy
        output[:, (offset+2) % 3: (offset+2) % 3 + 1] = z
    else:
        output[:, 2:] = xy[:, 0:1]
        output[:, 0:1] = xy[:, 1:]
        output[:, 1:2] = z
    return output


def cal_mixgrad(outputs, inputs):
    out_grad, = autograd.grad(outputs=[outputs.sum()], inputs=[inputs], create_graph=True)
    out_x2, = autograd.grad(outputs=[out_grad[:, 0].sum()], inputs=[inputs], create_graph=True)
    out_xx = out_x2[:, 0]
    out_y2, = autograd.grad(outputs=[out_grad[:, 1].sum()], inputs=[inputs], create_graph=True)
    out_yy = out_y2[:, 1]
    out_z2, = autograd.grad(outputs=[out_grad[:, 2].sum()], inputs=[inputs], create_graph=True)
    out_zz = out_z2[:, 2]
    return out_grad, out_xx, out_yy, out_zz

================================================
FILE: cavity_flow.py
================================================
"""
@author: Zongyi Li
This file is the Fourier Neural Operator for 3D problem such as the Navier-Stokes equation discussed in Section 5.3 in the [paper](https://arxiv.org/pdf/2010.08895.pdf),
which takes the 2D spatial + 1D temporal equation directly as a 3D problem
"""

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt


from timeit import default_timer
from torch.optim import Adam
from train_utils.datasets import MatReader
from train_utils.losses import LpLoss
from train_utils.utils import count_params

torch.manual_seed(0)
np.random.seed(0)


################################################################
# 3d fourier layers
################################################################

class SpectralConv3d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2, modes3):
        super(SpectralConv3d, self).__init__()

        """
        3D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1  # Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes2 = modes2
        self.modes3 = modes3

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3,
                                    dtype=torch.cfloat))
        self.weights2 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3,
                                    dtype=torch.cfloat))
        self.weights3 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3,
                                    dtype=torch.cfloat))
        self.weights4 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3,
                                    dtype=torch.cfloat))

    # Complex multiplication
    def compl_mul3d(self, input, weights):
        # (batch, in_channel, x,y,t ), (in_channel, out_channel, x,y,t) -> (batch, out_channel, x,y,t)
        return torch.einsum("bixyz,ioxyz->boxyz", input, weights)

    def forward(self, x):
        batchsize = x.shape[0]
        # Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfftn(x, dim=[-3, -2, -1])

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.out_channels, x.size(-3), x.size(-2), x.size(-1) // 2 + 1,
                             dtype=torch.cfloat, device=x.device)
        out_ft[:, :, :self.modes1, :self.modes2, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, :self.modes1, :self.modes2, :self.modes3], self.weights1)
        out_ft[:, :, -self.modes1:, :self.modes2, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, -self.modes1:, :self.modes2, :self.modes3], self.weights2)
        out_ft[:, :, :self.modes1, -self.modes2:, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, :self.modes1, -self.modes2:, :self.modes3], self.weights3)
        out_ft[:, :, -self.modes1:, -self.modes2:, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, -self.modes1:, -self.modes2:, :self.modes3], self.weights4)

        # Return to physical space
        x = torch.fft.irfftn(out_ft, s=(x.size(-3), x.size(-2), x.size(-1)))
        return x


class FNO3d(nn.Module):
    def __init__(self, modes1, modes2, modes3, width, padding):
        super(FNO3d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .

        input: the solution of the first 10 timesteps + 3 locations (u(1, x, y), ..., u(10, x, y),  x, y, t). It's a constant function in time, except for the last index.
        input shape: (batchsize, x=64, y=64, t=40, c=13)
        output: the solution of the next 40 timesteps
        output shape: (batchsize, x=64, y=64, t=40, c=1)
        """

        self.modes1 = modes1
        self.modes2 = modes2
        self.modes3 = modes3
        self.width = width
        self.padding = padding  # pad the domain if input is non-periodic
        self.fc0 = nn.Linear(5, 32)
        self.fc1 = nn.Linear(32, self.width)
        # input channel is 12: the solution of the first 10 timesteps + 3 locations (u(1, x, y), ..., u(10, x, y),  x, y, t)

        self.conv0 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.conv1 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.conv2 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.conv3 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.w0 = nn.Conv3d(self.width, self.width, 1)
        self.w1 = nn.Conv3d(self.width, self.width, 1)
        self.w2 = nn.Conv3d(self.width, self.width, 1)
        self.w3 = nn.Conv3d(self.width, self.width, 1)

        self.fc2 = nn.Linear(self.width, 128)
        self.fc3 = nn.Linear(128, 3)

    def forward(self, x):
        grid = self.get_grid(x.shape, x.device)
        x = torch.cat((x, grid), dim=-1)
        x = self.fc0(x)
        x = F.tanh(x)
        x = self.fc1(x)
        x = x.permute(0, 4, 1, 2, 3)
        x = F.pad(x, [0, self.padding, 0, self.padding, 0, self.padding])  # pad the domain if input is non-periodic

        x1 = self.conv0(x)
        x2 = self.w0(x)
        x = x1 + x2
        x = F.tanh(x)

        x1 = self.conv1(x)
        x2 = self.w1(x)
        x = x1 + x2
        x = F.tanh(x)

        x1 = self.conv2(x)
        x2 = self.w2(x)
        x = x1 + x2
        x = F.tanh(x)

        x1 = self.conv3(x)
        x2 = self.w3(x)
        x = x1 + x2

        # x = x[:, :, :-self.padding, :-self.padding, :-self.padding]
        x = x.permute(0, 2, 3, 4, 1)  # pad the domain if input is non-periodic
        x = self.fc2(x)
        x = F.tanh(x)
        x = self.fc3(x)
        return x

    def get_grid(self, shape, device):
        batchsize, size_x, size_y, size_z = shape[0], shape[1], shape[2], shape[3]
        gridx = torch.tensor(np.linspace(0, 1, size_x), dtype=torch.float)
        gridx = gridx.reshape(1, size_x, 1, 1, 1).repeat([batchsize, 1, size_y, size_z, 1])
        gridy = torch.tensor(np.linspace(0, 1, size_y), dtype=torch.float)
        gridy = gridy.reshape(1, 1, size_y, 1, 1).repeat([batchsize, size_x, 1, size_z, 1])
        gridz = torch.tensor(np.linspace(0, 1, size_z), dtype=torch.float)
        gridz = gridz.reshape(1, 1, 1, size_z, 1).repeat([batchsize, size_x, size_y, 1, 1])
        return torch.cat((gridx, gridy, gridz), dim=-1).to(device)


################################################################
# configs
################################################################


# PATH = '../data/cavity.mat'
PATH = '../data/lid-cavity.mat'
ntest = 1

modes = 8
width = 32

batch_size = 1

path = 'cavity'
path_model = 'model/' + path
path_train_err = 'results/' + path + 'train.txt'
path_test_err = 'results/' + path + 'test.txt'
path_image = 'image/' + path


sub_s = 4
sub_t = 20
S = 256 // sub_s
T_in = 1000 # 1000*0.005 = 5s
T = 50 # 1000 + 50*20*0.005 = 10s
padding = 14

################################################################
# load data
################################################################

# 15s, 3000 frames
reader = MatReader(PATH)
data_u = reader.read_field('u')[T_in:T_in+T*sub_t:sub_t, ::sub_s, ::sub_s].permute(1,2,0)
data_v = reader.read_field('v')[T_in:T_in+T*sub_t:sub_t, ::sub_s, ::sub_s].permute(1,2,0)

data_output = torch.stack([data_u, data_v],dim=-1).reshape(batch_size,S,S,T,2)
data_input = data_output[:,:,:,:1,:].repeat(1,1,1,T,1).reshape(batch_size,S,S,T,2)

print(data_output.shape)


device = torch.device('cuda')

def PINO_loss_Fourier_f(out, Re=500):
    pi = np.pi
    Lx = 1*(S + padding-1)/S
    Ly = 1*(S + padding-1)/S
    Lt = (0.005*sub_t*T) *(T + padding)/T

    nx = out.size(1)
    ny = out.size(2)
    nt = out.size(3)
    device = out.device

    # Wavenumbers in y-direction
    k_x = torch.cat((torch.arange(start=0, end=nx//2, step=1, device=device),
                     torch.arange(start=-nx//2, end=0, step=1, device=device)), 0).reshape(nx, 1, 1).repeat(1, ny, nt).reshape(1,nx,ny,nt,1)
    k_y = torch.cat((torch.arange(start=0, end=ny//2, step=1, device=device),
                     torch.arange(start=-ny//2, end=0, step=1, device=device)), 0).reshape(1, ny, 1).repeat(nx, 1, nt).reshape(1,nx,ny,nt,1)
    k_t = torch.cat((torch.arange(start=0, end=nt//2, step=1, device=device),
                     torch.arange(start=-nt//2, end=0, step=1, device=device)), 0).reshape(1, 1, nt).repeat(nx, ny, 1).reshape(1,nx,ny,nt,1)

    out_h = torch.fft.fftn(out, dim=[1, 2, 3])
    outx_h = 1j * k_x * out_h * (2 * pi / Lx)
    outy_h = 1j * k_y * out_h * (2 * pi / Ly)
    outt_h = 1j * k_t * out_h * (2 * pi / Lt)
    outxx_h = 1j * k_x * outx_h * (2 * pi / Lx)
    outyy_h = 1j * k_y * outy_h * (2 * pi / Ly)

    outx = torch.fft.irfftn(outx_h[:, :, :, :nt//2+1, :], dim=[1,2,3])[:,:S,:S,:T]
    outy = torch.fft.irfftn(outy_h[:, :, :, :nt//2+1, :], dim=[1,2,3])[:,:S,:S,:T]
    outt = torch.fft.irfftn(outt_h[:, :, :, :nt//2+1, :], dim=[1,2,3])[:,:S,:S,:T]
    outxx = torch.fft.irfftn(outxx_h[:, :, :, :nt//2+1, :], dim=[1,2,3])[:,:S,:S,:T]
    outyy = torch.fft.irfftn(outyy_h[:, :, :, :nt//2+1, :], dim=[1,2,3])[:,:S,:S,:T]
    out = out[:,:S,:S,:T]


    E1 = outt[..., 0] + out[..., 0]*outx[..., 0] + out[..., 1]*outy[..., 0] + outx[..., 2] - 1/Re*(outxx[..., 0] + outyy[..., 0])
    E2 = outt[..., 1] + out[..., 0]*outx[..., 1] + out[..., 1]*outy[..., 1] + outy[..., 2] - 1/Re*(outxx[..., 1] + outyy[..., 1])
    E3 = outx[..., 0] + outy[..., 1]

    target = torch.zeros(E1.shape, device=E1.device)
    E1 = F.mse_loss(E1,target)
    E2 = F.mse_loss(E2,target)
    E3 = F.mse_loss(E3,target)

    return E1, E2, E3

def PINO_loss_FDM_f(out, Re=500):
    dx = 1 / (S+2)
    dy = 1 / (S+2)
    dt = 0.005*sub_t

    out = out[:,:S,:S,:T,:]
    out = F.pad(out, [0,0, 1,0, 1,1, 1,1])
    out[:, :, -1, :, 0] = 1

    outx = (out[:,2:,1:-1,1:-1] - out[:,:-2,1:-1,1:-1]) / (2*dx)
    outy = (out[:,1:-1,2:,1:-1] - out[:,1:-1,:-2,1:-1]) / (2*dy)
    outt = (out[:,1:-1,1:-1,2:] - out[:,1:-1,1:-1,:-2]) / (2*dt)
    outlap = (out[:,2:,1:-1,1:-1] + out[:,:-2,1:-1,1:-1] + out[:,1:-1,2:,1:-1] + out[:,1:-1,:-2,1:-1] - 4*out[:,1:-1,1:-1,1:-1]) / (dx*dy)

    out = out[:,1:-1,1:-1,1:-1]

    E1 = outt[..., 0] + out[..., 0]*outx[..., 0] + out[..., 1]*outy[..., 0] + outx[..., 2] - 1/Re*(outlap[..., 0])
    E2 = outt[..., 1] + out[..., 0]*outx[..., 1] + out[..., 1]*outy[..., 1] + outy[..., 2] - 1/Re*(outlap[..., 1])
    E3 = outx[..., 0] + outy[..., 1]

    target = torch.zeros(E1.shape, device=E1.device)
    E1 = F.mse_loss(E1,target)
    E2 = F.mse_loss(E2,target)
    E3 = F.mse_loss(E3,target)

    return E1, E2, E3


def PINO_loss_ic(out, y):
    myloss = LpLoss(size_average=True)
    # target = torch.zeros(out.shape, device=out.device)
    # target[:, :, -1, 0] = 1
    # IC = myloss(out, target)
    # return IC

    IC = F.mse_loss(out, y)
    return IC

def PINO_loss_bc(out, y):
    myloss = LpLoss(size_average=True)
    # target = torch.zeros((batch_size,S,T,2), device=out.device)
    # target3 = torch.zeros((batch_size,S,T,2), device=out.device)
    # target3[..., 0] = 1
    # out = torch.stack([out[:,0,:], out[:,-1,:], out[:,:,-1], out[:,:,0]], -1)
    # target = torch.stack([target, target, target3, target], -1)
    # BC = myloss(out, target)
    # return BC

    BC1 = F.mse_loss(out[:,0,:], y[:,0,:])
    BC2 = F.mse_loss(out[:,-1,:], y[:,-1,:])
    BC3 = F.mse_loss(out[:,:,-1], y[:,:,-1])
    BC4 = F.mse_loss(out[:,:,0], y[:,:,0])
    return (BC1+BC2+BC3+BC4)/4

################################################################
# training and evaluation
################################################################


model = model = FNO3d(modes, modes, modes, width, padding).cuda()
print(count_params(model))

optimizer = Adam(model.parameters(), lr=0.0025, weight_decay=0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.5)

myloss = LpLoss(size_average=False)
model.train()
x = data_input.cuda().reshape(batch_size,S,S,T,2)
y = data_output.cuda().reshape(batch_size,S,S,T,2)

for ep in range(5000):
    t1 = default_timer()
    optimizer.zero_grad()

    out = model(x)

    loss_l2 = myloss(out[:,:S,:S,:T,:2], y)
    IC = PINO_loss_ic(out[:,:S,:S,0,:2], y[:,:,:,0])
    BC = PINO_loss_bc(out[:,:S,:S,:T,:2], y)
    E1, E2, E3 = PINO_loss_Fourier_f(out)
    # E1, E2, E3 = PINO_loss_FDM_f(out)
    loss_pino = IC*1 + BC*1 + E1*1 + E2*1 + E3*1

    loss_pino.backward()

    optimizer.step()
    scheduler.step()
    t2 = default_timer()
    print(ep, t2-t1, IC.item(), BC.item(), E1.item(),  E2.item(), E3.item(), loss_l2.item())

    if ep % 1000 == 500:
        y_plot = y[0,:,:,:].cpu().numpy()
        out_plot = out[0,:S,:S,:T].detach().cpu().numpy()

        fig, ax = plt.subplots(2, 2)
        ax[0,0].imshow(y_plot[..., -1, 0])
        ax[0,1].imshow(y_plot[..., -1, 1])
        ax[1,0].imshow(out_plot[..., -1, 0])
        ax[1,1].imshow(out_plot[..., -1, 1])
        plt.show()

================================================
FILE: configs/baseline/NS-50s-LAAF.yaml
================================================
data:
  datapath: 'data/ns_V1e-3_N5000_T50.mat'
  vis: 0.001
  total_num: 5000
  offset: 4900
  n_sample: 1
  time_scale: 49
  nx: 64
  nt: 50
  sub: 1      # not used here
  sub_t: 1    # not used here
  shuffle: True

model:
  layers: [3, 50, 50, 50, 50, 50, 50, 3]
  activation: LAAF-10 tanh

train:
  batchsize: 1
  epochs: 5000
  milestones: [1000, 1500, 2000]
  base_lr: 0.001
  num_domain: 10000
  num_boundary: 18000
  num_test: 100
  log_step: 100

log:
  logfile: 'log/pinns-50s-laaf.csv'

================================================
FILE: configs/baseline/NS-50s.yaml
================================================
data:
  datapath: 'data/ns_V1e-3_N5000_T50.mat'
  vis: 0.001
  total_num: 5000
  offset: 4900
  n_sample: 1
  time_scale: 49
  nx: 64
  nt: 50
  sub: 1      # not used here
  sub_t: 1    # not used here
  shuffle: True

model:
  layers: [3, 50, 50, 50, 50, 50, 50, 3]

train:
  epochs: 15000
  base_lr: 0.001
  save_dir: 'Re500-FDM'
  save_name: 'PINO-scratch-05s.pt'

log:
  logfile: 'log/pinns-50s-best.csv'

================================================
FILE: configs/baseline/Re500-05s-deeponet.yaml
================================================
data:
  datapath: '/mnt/md1/zongyi/NS_fft_Re500_T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 4000
  time_interval: 0.5
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: False
  data_val: 'data/NS_Re500_s256_T100_test.npy'
  val_nx: 256
  val_nt: 128
  val_sub: 4
  val_subt: 2

model:
  layers: [40, 40]
  activation: 'relu'

train:
  batchsize: 1
  epochs: 100
  milestones: [25000, 50000, 75000]
  base_lr: 0.001

  ckpt: 'checkpoints/Re500-FDM/pretrain-Re500-05s-4000.pt'

log:
  project: 'PINO-None'
  group: 'eval'


================================================
FILE: configs/baseline/Re500-pinns-05s-LAAF.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [3, 50, 50, 50, 50, 50, 50, 3]
  activation: LAAF-10 tanh

train:
  batchsize: 1
  epochs: 3000
  milestones: [1000, 1500, 2000]
  base_lr: 0.01
  num_domain: 5000
  num_boundary: 10000
  num_test: 100
  log_step: 100

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-PINNs'
  logfile: 'log/pinns-plot-laaf128.csv'


================================================
FILE: configs/baseline/Re500-pinns-05s-SA.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [3, 100, 100, 100, 100, 100, 100, 3]
  activation: tanh

train:
  batchsize: 1
  epochs: 5000
  milestones: [1000, 1500, 2000]
  base_lr: 0.005
  num_domain: 10000
  num_boundary: 10000
  num_init: 5000
  num_test: 100
  log_step: 100

log:
  project: 'PINO-Re500-ICLR'
  group: 'SA-PINNs'
  logfile: 'log/sa-pinns128-plot.csv'


================================================
FILE: configs/baseline/Re500-pinns-05s.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [3, 50, 50, 50, 50, 50, 50, 3]
  activation: tanh

train:
  batchsize: 1
  epochs: 3000
  milestones: [1000, 1500, 2000]
  base_lr: 0.01
  num_domain: 5000
  num_boundary: 10000
  num_test: 100
  log_step: 100

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-PINNs'
  logfile: 'log/pinns128-plot.csv'


================================================
FILE: configs/baseline/Re500-pinns.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 4
  sub_t: 1
  shuffle: True

train:
  batchsize: 1
  epochs: 5000
  base_lr: 0.001
  num_domain: 5000
  num_boundary: 10000
  num_test: 100
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  log_step: 100

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-PINNs-long'


================================================
FILE: configs/finetune/Darcy-finetune.yaml
================================================
data:
  name: 'Darcy'
  datapath: '/mnt/md1/zongyi/piececonst_r421_N1024_smooth2.mat'
  total_num: 1024
  offset: 500
  n_sample: 1
  nx: 421
  sub: 7

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [20, 20, 20, 20]
  modes2: [20, 20, 20, 20]
  fc_dim: 128
  act: gelu

train:
  batchsize: 1
  epochs: 500
  milestones: [100, 200, 300, 400]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'darcy-FDM'
  save_name: 'darcy-finetune-pino.pt'
  ckpt: 'checkpoints/darcy-FDM/darcy-pretrain-pino.pt'

log:
  project: 'ICLR-Darcy-finetune'
  group: 'gelu-pino-pino'


================================================
FILE: configs/finetune/Re100-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part0.npy'
  Re: 100
  total_num: 100
  offset: 190
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [32, 32, 32, 32, 32]
  modes1: [16, 16, 16, 16]
  modes2: [16, 16, 16, 16]
  modes3: [16, 16, 16, 16]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 7500
  milestones: [500, 1500, 3000, 4000, 5000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-finetune-Re100-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-finetune'
  group: 'Re100-finetune-1s'


================================================
FILE: configs/finetune/Re200-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part0.npy'
  Re: 200
  total_num: 100
  offset: 194
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [32, 32, 32, 32, 32]
  modes1: [16, 16, 16, 16]
  modes2: [16, 16, 16, 16]
  modes3: [16, 16, 16, 16]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [500, 1500, 3000, 4000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-finetune-Re200-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-finetune'
  group: 'Re200-finetune-1s'


================================================
FILE: configs/finetune/Re250-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part0.npy'
  Re: 250
  total_num: 100
  offset: 198
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [32, 32, 32, 32, 32]
  modes1: [16, 16, 16, 16]
  modes2: [16, 16, 16, 16]
  modes3: [16, 16, 16, 16]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [500, 1500, 3000, 4000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-finetune-Re250-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-finetune'
  group: 'Re250-finetune-1s'


================================================
FILE: configs/finetune/Re300-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part0.npy'
  Re: 300
  total_num: 100
  offset: 190
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [32, 32, 32, 32, 32]
  modes1: [16, 16, 16, 16]
  modes2: [16, 16, 16, 16]
  modes3: [16, 16, 16, 16]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [500, 1500, 3000, 4000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-finetine-Re300-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-finetune'
  group: 'Re300-finetune-1s'


================================================
FILE: configs/finetune/Re350-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part0.npy'
  Re: 350
  total_num: 100
  offset: 198
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [32, 32, 32, 32, 32]
  modes1: [16, 16, 16, 16]
  modes2: [16, 16, 16, 16]
  modes3: [16, 16, 16, 16]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [500, 1500, 3000, 4000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-finetine-Re300-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-finetune'
  group: 'Re350-finetune-1s'


================================================
FILE: configs/finetune/Re400-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part0.npy'
  Re: 400
  total_num: 100
  offset: 199
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [32, 32, 32, 32, 32]
  modes1: [16, 16, 16, 16]
  modes2: [16, 16, 16, 16]
  modes3: [16, 16, 16, 16]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [500, 1500, 3000, 4000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-finetune-Re400-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-finetune'
  group: 'Re400-finetune-1s'


================================================
FILE: configs/finetune/Re500-finetune-05s-2layer.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 6000
  milestones: [1000, 3000, 5000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4k1k.pt'
  twolayer: True

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4k1-2layer'


================================================
FILE: configs/finetune/Re500-finetune-05s-eqn.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-eqn.pt'

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-eqn'


================================================
FILE: configs/finetune/Re500-finetune-05s4C0.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4C0.pt'

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4C0'


================================================
FILE: configs/finetune/Re500-finetune-05s4C1.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 4
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4C1.pt'
  profile: True

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4C1-profile-long'


================================================
FILE: configs/finetune/Re500-finetune-05s4C4.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4C4.pt'

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4C4'


================================================
FILE: configs/finetune/Re500-finetune-05s4k-2layer.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 6000
  milestones: [1000, 3000, 5000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/pretrain-Re500-05s-4000.pt'
  twolayer: True

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4k-2layer'


================================================
FILE: configs/finetune/Re500-finetune-05s4k1k.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4k1k.pt'
  profile: True

log:
  project: 'PINO-Re500-ICLR-rebuttal'
  group: 'Re500-finetune-128-4k1'


================================================
FILE: configs/finetune/Re500-finetune-05s4k4-2layer.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 6000
  milestones: [1000, 3000, 5000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4k.pt'
  twolayer: True

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4k4-2layer'


================================================
FILE: configs/finetune/Re500-finetune-05s4k4k.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 4
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500'
  save_name: 'PINO-fintune-05s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4k4.pt'
  profile: True

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-finetune-128-4k4-profile'


================================================
FILE: configs/finetune/Re500-finetune-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 256
  nt: 128
  sub: 2
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [500, 1500, 3000, 4000, 5000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-fintune-1s.pt'

log:
  project: 'PINO-sweep'
  group: 'Re500-finetune'


================================================
FILE: configs/instance/Re500-1_8-FNO.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [256, 256, 513]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 10
  total_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0, 0.125]

train:
  batchsize: 1
  epochs: 201
  num_iter: 1_001
  milestones: [400, 800]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  save_step: 500

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-800-FNO
  entity: hzzheng-pino
  project: PINO-NS-test-time-opt
  group: Re500-1_8s-800-FNO


================================================
FILE: configs/instance/Re500-1_8-PINO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [256, 256, 513]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 250
  n_test_samples: 1
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  pad_ratio: 0.125

train:
  batchsize: 1
  epochs: 201
  num_iter: 1_001
  milestones: [400, 800]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  save_step: 500

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-800-PINO-tto
  entity: hzzheng-pino
  project: PINO-NS-test-time-opt
  group: Re500-1_8s-800-PINO-s


================================================
FILE: configs/instance/Re500-1_8-PINO.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [256, 256, 513]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 10
  total_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0.0, 0.125]

train:
  batchsize: 1
  epochs: 201
  num_iter: 1_001
  milestones: [400, 800]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  save_step: 500

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-800-PINO-tto
  entity: hzzheng-pino
  project: PINO-NS-test-time-opt
  group: Re500-1_8s-800-PINO-s


================================================
FILE: configs/ngc/Re500-1_8-dat0-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 5
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 0.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat0-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-dat0-PINO


================================================
FILE: configs/ngc/Re500-1_8-dat200-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 25
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [20_000, 50_000, 80_000, 110_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat200-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-dat200-PINO


================================================
FILE: configs/ngc/Re500-1_8-dat40-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 5
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat40-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-dat40-PINO


================================================
FILE: configs/ngc/Re500-1_8-dat400-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 50
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [20_000, 50_000, 80_000, 110_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat400-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-dat400-PINO


================================================
FILE: configs/ngc/Re500-1_8-dat80-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 10
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat80-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-dat80-PINO


================================================
FILE: configs/ngc/Re500-1_8-dat800-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat800-PINO
  entity: hzzheng-pino
  project: PINO-NS-ngc
  group: Re500-1_8s-dat800-PINO


================================================
FILE: configs/ngc/Re500-1_8-res16-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 150
  data_res: [16, 16, 129]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-res16-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-res16-PINO


================================================
FILE: configs/ngc/Re500-1_8-res32-PINO.yaml
================================================
data:
  name: KF
  paths: ['/mount/data/NS-Re500_T300_256x256x500.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 150
  data_res: [32, 32, 129]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 1
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-res32-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-res32-PINO


================================================
FILE: configs/operator/Darcy-pretrain.yaml
================================================
data:
  name: 'Darcy'
  path: '/raid/hongkai/darcy-train.mat'
  total_num: 1024
  offset: 0
  n_sample: 1000
  nx: 421
  sub: 7
  pde_sub: 2

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [20, 20, 20, 20]
  modes2: [20, 20, 20, 20]
  fc_dim: 128
  act: gelu
  pad_ratio: [0., 0.]

train:
  batchsize: 20
  num_iter: 15_001
  milestones: [5_000, 7_500, 10_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 2_500
  eval_step: 2_500

test:
  path: '/raid/hongkai/darcy-test.mat'
  total_num: 1024
  offset: 0
  n_sample: 500
  nx: 421
  sub: 2
  batchsize: 1
  
  
log:
  logdir: Darcy-PINO-new
  entity: hzzheng-pino
  project: DarcyFlow
  group: PINO-1000-new


================================================
FILE: configs/operator/Re500-05s-1000-FNO.yaml
================================================
data:
  paths: ['../data/NS-Re500Part0.npy', '../data/NS-Re500Part1.npy', '../data/NS-Re500Part2.npy']
  Re: 500
  total_num: 200
  offset: 0
  n_samples: 1000
  t_duration: 0.5
  data_res: [64, 64, 33]
  pde_res: [128, 128, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 501
  milestones: [300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 100

test:
  batchsize: 1
  data_res: [128, 128, 65]
  ckpt: model-500.pt

log:
  logdir: Re500-05s-1000-FNO
  entity: hzzheng-pino
  project: 'PINO-NS'
  group: 'Re500-05s-1000-FNO'


================================================
FILE: configs/operator/Re500-05s-1000-PINO.yaml
================================================
data:
  paths: ['../data/NS-Re500Part0.npy', '../data/NS-Re500Part1.npy', '../data/NS-Re500Part2.npy']
  Re: 500
  total_num: 300
  offset: 0
  n_samples: 1000
  t_duration: 0.5
  data_res: [64, 64, 33]
  pde_res: [128, 128, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 501
  milestones: [300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 100

test:
  batchsize: 1
  data_res: [128, 128, 65]
  ckpt: model-500.pt

log:
  logdir: Re500-05s-1000-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-05s-1000-PINO


================================================
FILE: configs/operator/Re500-05s-3000-FNO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 300
  testoffset: 2500
  n_test_samples: 300
  t_duration: 0.5
  raw_res: [256, 256, 257]
  data_res: [64, 64, 33]
  pde_res: [64, 64, 33]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 4
  epochs: 401
  milestones: [200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 50

test:
  batchsize: 1
  data_res: [128, 128, 65]
  ckpt: model-400.pt

log:
  logdir: Re500-1s-3000-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1s-3000-FNO


================================================
FILE: configs/operator/Re500-05s-600-FNO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 300
  testoffset: 2500
  n_test_samples: 200
  t_duration: 0.5
  raw_res: [256, 256, 257]
  data_res: [64, 64, 65]  # resolution in 1 second
  pde_res: [64, 64, 65]   # resolution in 1 second
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 401
  milestones: [200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 50

test:
  batchsize: 1
  data_res: [64, 64, 65]
  ckpt: model-400.pt

log:
  logdir: Re500-05s-600-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-05s-600-FNO


================================================
FILE: configs/operator/Re500-05s-600-PINO-xl.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 300
  testoffset: 2500
  n_test_samples: 200
  t_duration: 0.5
  raw_res: [256, 256, 257]
  data_res: [64, 64, 65]  # resolution in 1 second
  pde_res: [256, 256, 257]   # resolution in 1 second
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 1
  epochs: 301
  milestones: [200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 10

test:
  batchsize: 1
  data_res: [64, 64, 65]
  ckpt: model-400.pt

log:
  logdir: Re500-05s-600-PINO-xl
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-05s-600-PINO-xl


================================================
FILE: configs/operator/Re500-05s-600-PINO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 300
  testoffset: 2500
  n_test_samples: 200
  t_duration: 0.5
  raw_res: [256, 256, 257]
  data_res: [64, 64, 65]  # resolution in 1 second
  pde_res: [256, 256, 257]   # resolution in 1 second
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 1
  epochs: 301
  milestones: [200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 10

test:
  batchsize: 1
  data_res: [64, 64, 65]
  ckpt: model-400.pt

log:
  logdir: Re500-05s-600-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-05s-600-PINO


================================================
FILE: configs/operator/Re500-05s-FNO.yaml
================================================
data:
  paths: ['../data/NS-Re500Part0.npy', '../data/NS-Re500Part1.npy']
  Re: 500
  total_num: 200
  offset: 0
  n_samples: 700
  t_duration: 0.5
  data_res: [64, 64, 33]
  pde_res: [128, 128, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 501
  milestones: [300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 100

test:
  batchsize: 1
  data_res: [128, 128, 65]
  ckpt: model-500.pt

log:
  logdir: Re500-05s-FNO
  entity: hzzheng-pino
  project: 'PINO-NS'
  group: 'Re500-05s-FNO'


================================================
FILE: configs/operator/Re500-1_16-800-FNO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 50
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [64, 64, 257]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 50
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.0625
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  pad_ratio: 0.125

train:
  batchsize: 2
  start_iter: 0
  num_iter: 50_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_16s-800-FNO-s
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_16s-800-FNO-s


================================================
FILE: configs/operator/Re500-1_16-800-PINO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 50
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 200
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.0625
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.125

train:
  batchsize: 2
  start_iter: 0
  num_iter: 200_001
  milestones: [20_000, 60_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 10.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [128, 128, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_16s-800-PINO-s
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_16s-800-PINO-s


================================================
FILE: configs/operator/Re500-1_4-2000-FNO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 600
  testoffset: 2500
  n_test_samples: 400
  t_duration: 0.25
  raw_res: [256, 256, 257]
  data_res: [256, 256, 257]  # resolution in 1 second
  pde_res: [256, 256, 257]   # resolution in 1 second
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 401
  milestones: [100, 300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 50

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_4s-2000-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_4s-2000-FNO


================================================
FILE: configs/operator/Re500-1_8-0-PINO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 10
  data_res: [64, 64, 129]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.125

train:
  batchsize: 2
  start_iter: 35_001
  num_iter: 200_001
  milestones: [30_000, 70_000, 110_000, 150_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  xy_loss: 0.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-0-PINO-s
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-0-PINO-s


================================================
FILE: configs/operator/Re500-1_8-1200-FNO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T300_id0.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 150
  data_res: [64, 64, 129]  # resolution in 1 second
  pde_res: [64, 64, 129]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 250
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.125

train:
  batchsize: 2
  epochs: 201
  num_iter: 50_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 129]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-1200-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-1200-FNO


================================================
FILE: configs/operator/Re500-1_8-1200-PINO.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 150
  data_res: [64, 64, 129]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 250
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.125

train:
  batchsize: 2
  epochs: 201
  num_iter: 150_001
  milestones: [30_000, 60_000, 90_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-1200-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-1200-PINO


================================================
FILE: configs/operator/Re500-1_8-200-FNO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 25
  data_res: [128, 128, 257]  # resolution in 1 second
  pde_res: [128, 128, 257]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 250
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: [0, 0.125]

train:
  batchsize: 1
  epochs: 201
  num_iter: 50_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat200-FNO
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-dat200-FNO


================================================
FILE: configs/operator/Re500-1_8-2000-FNO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 250
  data_res: [64, 64, 129]  # resolution in 1 second
  pde_res: [64, 64, 129]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  pad_ratio: 0.125

train:
  batchsize: 1
  epochs: 201
  num_iter: 60_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat2000-FNO
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-dat2000-FNO


================================================
FILE: configs/operator/Re500-1_8-2000-FNO-xl.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 350
  testoffset: 2500
  n_test_samples: 400
  t_duration: 0.125
  raw_res: [256, 256, 257]
  data_res: [256, 256, 257]  # resolution in 1 second
  pde_res: [256, 256, 257]   # resolution in 1 second
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 201
  milestones: [50, 100, 150]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 20

test:
  batchsize: 1
  data_res: [256, 256, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-2400-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-2400-FNO


================================================
FILE: configs/operator/Re500-1_8-2000-PINO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T300_id0.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 150
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 2
  epochs: 201
  num_iter: 100_001
  milestones: [10_000, 30_000, 50_000, 70_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-2k-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-2k-PINO


================================================
FILE: configs/operator/Re500-1_8-2200-FNO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 275
  data_res: [64, 64, 129]  # resolution in 1 second
  pde_res: [64, 64, 257]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: 0.125

train:
  batchsize: 1
  start_iter: 30_001
  num_iter: 60_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat2200-FNO
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-dat2200-FNO


================================================
FILE: configs/operator/Re500-1_8-2200-PINO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 275
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 275
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.125

train:
  batchsize: 2
  start_iter: 30_001
  num_iter: 400_001
  milestones: [30_000, 90_000, 150_000, 250_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  xy_loss: 50.0
  save_step: 10000
  eval_step: 10000

test:
  batchsize: 1
  data_res: [64, 64, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-2200-PINO-s
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-2200-PINO-s


================================================
FILE: configs/operator/Re500-1_8-800-FNO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [64, 64, 129]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0, 0.125]

train:
  batchsize: 2
  start_iter: 0
  num_iter: 50_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-800-FNO-s
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-800-FNO-s


================================================
FILE: configs/operator/Re500-1_8-800-FNO-s32.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [32, 32, 129]  # resolution in 1 second
  pde_res: [32, 32, 129]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0.0, 0.125]

train:
  batchsize: 2
  start_iter: 0
  num_iter: 50_001
  milestones: [20_000, 40_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]

log:
  logdir: Re500-1_8s-800-FNO-s32
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-800-FNO-s32


================================================
FILE: configs/operator/Re500-1_8-800-PINO-s.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 275
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0.0, 0.125]

train:
  batchsize: 2
  start_iter: 0
  num_iter: 200_001
  milestones: [20_000, 60_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  xy_loss: 10.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]

log:
  logdir: Re500-1_8s-800-PINO-s
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-800-PINO-s


================================================
FILE: configs/operator/Re500-1_8-800-PINO-s16.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [16, 16, 65]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 275
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0.0, 0.125]

train:
  batchsize: 1
  start_iter: 0
  num_iter: 200_001
  milestones: [20_000, 60_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  xy_loss: 10.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]

log:
  logdir: Re500-1_8s-800-PINO-s-16
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-800-PINO-s-16


================================================
FILE: configs/operator/Re500-1_8-800-PINO-s32.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [32, 32, 129]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 275
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [12, 12, 12, 12]
  modes2: [12, 12, 12, 12]
  modes3: [12, 12, 12, 12]
  fc_dim: 128
  act: gelu
  pad_ratio: [0.0, 0.125]

train:
  batchsize: 2
  start_iter: 0
  num_iter: 200_001
  milestones: [20_000, 60_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  xy_loss: 10.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 257]

log:
  logdir: Re500-1_8s-800-PINO-s-32
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-800-PINO-s-32


================================================
FILE: configs/operator/Re500-1_8-800-UNet.yaml
================================================
data:
  name: KF
  paths: ['/raid/hongkai/NS-Re500_T300_id0-shuffle.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 100
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [64, 64, 129]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 275
  n_test_samples: 25
  t_duration: 0.125
  shuffle: True

model:
  f_maps: 128

train:
  batchsize: 2
  start_iter: 0
  num_iter: 50_001
  milestones: [20_000, 40_000]
  base_lr: 0.0002
  scheduler_gamma: 0.5
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 257]
  ckpt: model-5000.pt

log:
  logdir: Re500-1_8s-800-UNet
  entity: hzzheng-pino
  project: PINO-KF-Re500
  group: Re500-1_8s-800-UNet


================================================
FILE: configs/operator/Re500-1_8-dat1.6k-PINO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T300_id0.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 200
  data_res: [64, 64, 257]  # resolution in 1 second
  pde_res: [256, 256, 513]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 250
  testoffset: 200
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 2
  epochs: 201
  num_iter: 200_001
  milestones: [20_000, 70_000, 120_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [64, 64, 257]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-pde2k-dat16-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-pde2k-dat16-PINO


================================================
FILE: configs/operator/Re500-1_8-dat400-FNO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T300_id0.npy']
  Re: 500
  offset: 0
  total_num: 300
  raw_res: [256, 256, 513]
  n_data_samples: 50
  data_res: [64, 64, 129]  # resolution in 1 second
  pde_res: [64, 64, 129]   # resolution in 1 second
  a_offset: 0
  n_a_samples: 50
  testoffset: 250
  n_test_samples: 50
  t_duration: 0.125
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.0625

train:
  batchsize: 2
  epochs: 201
  num_iter: 100_001
  milestones: [10_000, 30_000, 50_000, 70_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 5000
  eval_step: 5000

test:
  batchsize: 1
  data_res: [256, 256, 513]
  ckpt: model-400.pt

log:
  logdir: Re500-1_8s-dat400-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1_8s-dat400-FNO


================================================
FILE: configs/operator/Re500-1s-FNO.yaml
================================================
data:
  paths: ['../data/NS-Re500Part0.npy', '../data/NS-Re500Part1.npy', '../data/NS-Re500Part2.npy']
  Re: 500
  total_num: 300
  offset: 0
  n_samples: 200
  t_duration: 1.0
  data_res: [64, 64, 65]
  pde_res: [128, 128, 129]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 501
  milestones: [200, 400]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 100

log:
  logdir: Re500-1s-200-FNO
  entity: hzzheng-pino
  project: 'PINO-NS'
  group: 'Re500-1s-200-FNO'


================================================
FILE: configs/operator/Re500-3000-FNO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 300
  testoffset: 2500
  n_test_samples: 500
  sub_x: 4
  sub_t: 4
  t_duration: 1.0
  data_res: [64, 64, 65]
  pde_res: [256, 256, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 4
  epochs: 401
  milestones: [200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 50

log:
  logdir: Re500-1s-3000-FNO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1s-3000-FNO


================================================
FILE: configs/operator/Re500-3000-PINO.yaml
================================================
data:
  name: KF
  paths: ['../data/NS-Re500_T3000_id0.npy']
  Re: 500
  total_num: 3000
  offset: 0
  n_samples: 2400
  sub_x: 4
  sub_t: 4
  pde_subx: 1
  pde_subt: 2
  t_duration: 1.0
  data_res: [64, 64, 65]
  pde_res: [256, 256, 129]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 1
  epochs: 401
  milestones: [200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 50

log:
  logdir: Re500-1s-3000-PINO
  entity: hzzheng-pino
  project: PINO-NS
  group: Re500-1s-3000-PINO


================================================
FILE: configs/operator/Re500-4000-FNO.yaml
================================================
data:
  paths: ['../data/NS-T4000.npy']
  Re: 500
  total_num: 4000
  offset: 0
  n_samples: 3200
  t_duration: 1.0
  data_res: [64, 64, 65]
  pde_res: [128, 128, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 501
  milestones: [300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 100

log:
  logdir: Re500-1s-FNO
  entity: hzzheng-pino
  project: 'PINO-NS'
  group: 'Re500-1s-FNO'


================================================
FILE: configs/operator/Re500-FNO.yaml
================================================
data:
  paths: ['../data/NS-Re500Part0.npy', '../data/NS-Re500Part1.npy']
  Re: 500
  total_num: 200
  offset: 0
  n_samples: 700
  t_duration: 0.5
  data_res: [64, 64, 33]
  pde_res: [128, 128, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4
  pad_ratio: 0.03125

train:
  batchsize: 2
  epochs: 501
  milestones: [300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_step: 100

test:
  batchsize: 1
  data_res: [128, 128, 65]
  ckpt: model-500.pt

log:
  logdir: Re500-05s-FNO
  entity: hzzheng-pino
  project: 'PINO-NS'
  group: 'Re500-05s-FNO'


================================================
FILE: configs/operator/Re500-PINO.yaml
================================================
data:
  paths: ['../data/NS-Re500Part0.npy', '../data/NS-Re500Part1.npy']
  Re: 500
  total_num: 200
  offset: 0
  n_samples: 700
  t_duration: 0.5
  data_res: [64, 64, 33]
  pde_res: [128, 128, 65]
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 2
  epochs: 501
  milestones: [300]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_step: 100

test:
  batchsize: 1
  data_res: [64, 64, 33]
  ckpt: model-500.pt

log:
  logdir: Re500-05s-PINO
  entity: hzzheng-pino
  project: 'PINO-NS'
  group: 'Re500-05s-PINO'


================================================
FILE: configs/pretrain/Darcy-pretrain-deeponet.yaml
================================================
data:
  name: 'Darcy'
  datapath: '/mnt/md1/zongyi/piececonst_r421_N1024_smooth1.mat'
  total_num: 1024
  offset: 0
  n_sample: 1000
  nx: 421
  sub: 7

model:
  branch_layers: [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
  trunk_layers: [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
  activation: tanh

train:
  batchsize: 20
  epochs: 2000
  milestones: [400, 800, 1200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  save_dir: 'darcy-deeponet'
  save_name: 'darcy-pretrain-deeponet.pt'

log:
  project: 'PINO-Darcy-pretrain'
  group: 'deeponet'


================================================
FILE: configs/pretrain/Darcy-pretrain.yaml
================================================
data:
  name: 'Darcy'
  datapath: '/mnt/md1/zongyi/piececonst_r421_N1024_smooth1.mat'
  total_num: 1024
  offset: 0
  n_sample: 1000
  nx: 421
  sub: 7
  pde_sub: 2

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [20, 20, 20, 20]
  modes2: [20, 20, 20, 20]
  fc_dim: 128
  act: gelu

train:
  batchsize: 20
  epochs: 300
  milestones: [100, 150, 200]
  base_lr: 0.001
  scheduler_gamma: 0.5
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'darcy-FDM'
  save_name: 'darcy-pretrain-pino.pt'

log:
  project: 'PINO-Darcy-pretrain'
  group: 'gelu-pino'
  entity: hzzheng-pino


================================================
FILE: configs/pretrain/Re100-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part0.npy'
  datapath2: 'data/NS_fine_Re100_T128_part1.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re100-1s-tanh'


================================================
FILE: configs/pretrain/Re200-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part0.npy'
  datapath2: 'data/NS_fine_Re200_T128_part1.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re200-1s-tanh'


================================================
FILE: configs/pretrain/Re250-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part0.npy'
  datapath2: 'data/NS_fine_Re250_T128_part1.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re250-1s-tanh'


================================================
FILE: configs/pretrain/Re300-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part0.npy'
  datapath2: 'data/NS_fine_Re300_T128_part1.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re300-1s-tanh'


================================================
FILE: configs/pretrain/Re350-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part0.npy'
  datapath2: 'data/NS_fine_Re350_T128_part1.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re350-1s-tanh'


================================================
FILE: configs/pretrain/Re400-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part0.npy'
  datapath2: 'data/NS_fine_Re400_T128_part1.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re400-1s-tanh'


================================================
FILE: configs/pretrain/Re500-05s-deeponet.yaml
================================================
data:
  datapath: '/mnt/md1/zongyi/NS_fft_Re500_T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 400
  time_interval: 0.5
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True

model:
  branch_layers: [100, 100, 100]
  trunk_layers: [100, 100, 100]

train:
  batchsize: 20
  epochs: 10001
  milestones: [2500, 5000, 7500]
  base_lr: 0.001
  scheduler_gamma: 0.5
  save_dir: 'Re500-deepOnet'
  save_name: 'DeepONet-pretrain-Re500.pt'

log:
  project: 'PINO-pretrain-ICLR'
  group: 'Re500-05s-deepONet'


================================================
FILE: configs/pretrain/Re500-FNO-1s-100.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 100
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 64
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu

train:
  batchsize: 2
  epochs: 100_001
  milestones: [20_000, 40_000, 60_000, 80_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_dir: Re500-FNO-100
  save_name: FNO-Re500-1s-100.pt
  data_iter: 1
  eqn_iter: 0

log:
  entity: hzzheng-pino
  project: PINO-Operator-Learning
  group: FNO-Re500-1s-100


================================================
FILE: configs/pretrain/Re500-FNO-1s-200.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 200
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 64
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu

train:
  batchsize: 2
  epochs: 100_001
  milestones: [20_000, 40_000, 60_000, 80_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_dir: Re500-FNO-200
  save_name: FNO-Re500-1s-200.pt
  data_iter: 1
  eqn_iter: 0

log:
  entity: hzzheng-pino
  project: PINO-Operator-Learning
  group: FNO-Re500-1s-200


================================================
FILE: configs/pretrain/Re500-FNO-1s-400.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 400
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 64
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu

train:
  batchsize: 2
  epochs: 100_001
  milestones: [20_000, 40_000, 60_000, 80_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 0.0
  f_loss: 0.0
  xy_loss: 1.0
  save_dir: Re500-FNO-400
  save_name: FNO-Re500-1s-400.pt
  data_iter: 1
  eqn_iter: 0

log:
  entity: hzzheng-pino
  project: PINO-Operator-Learning
  group: FNO-Re500-1s-400


================================================
FILE: configs/pretrain/Re500-PINO-1s-100-4v4.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 100
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 64
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu 

train:
  batchsize: 2
  epochs: 40_000
  milestones: [10_000, 20_000, 30_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: Re500-PINO-1s-100
  save_name: PINO-pretrain-Re500-1s-100.pt
  data_iter: 4
  eqn_iter: 4

log:
  entity: hzzheng-pino
  project: PINO-Operator-Learning
  group: PINO-Re500-1s-100-4v4


================================================
FILE: configs/pretrain/Re500-PINO-1s-200-4v4.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 200
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 64
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu 

train:
  batchsize: 2
  epochs: 100_000
  milestones: [20_000, 40_000, 60_000, 80_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: Re500-PINO-1s-200
  save_name: PINO-pretrain-Re500-1s-200.pt
  data_iter: 4
  eqn_iter: 4

log:
  entity: hzzheng-pino
  project: PINO-Operator-Learning
  group: PINO-Re500-1s-200-4v4


================================================
FILE: configs/pretrain/Re500-PINO-1s-400-1v1.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 400
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 64
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu 

train:
  batchsize: 2
  epochs: 40_000
  milestones: [10_000, 20_000, 30_000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: Re500-PINO-1s-400
  save_name: PINO-pretrain-Re500-1s-400.pt
  data_iter: 4
  eqn_iter: 4

log:
  entity: hzzheng-pino
  project: PINO-Operator-Learning
  group: PINO-Re500-1s-400-4v4


================================================
FILE: configs/pretrain/Re500-pretrain-05s-4C1.yaml
================================================
data:
  datapath: '/mnt/md1/zongyi/NS_fft_Re500_T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 400
  time_interval: 0.5
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  S2: 128
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 40000
  milestones: [10000, 20000, 30000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-pretrain-Re500-05s-4C1.pt'
  data_iter: 4      # number of update steps on data for each epoch
  eqn_iter: 1       # number of update steps on virtual PDE for each epoch

log:
  project: 'PINO-pretrain-ICLR'
  group: 'Re500-05s-4C1'


================================================
FILE: configs/pretrain/Re500-pretrain-05s-4C4.yaml
================================================
data:
  datapath: '/mnt/md1/zongyi/NS_fft_Re500_T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 400
  time_interval: 0.5
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  num_ics: 300
  S2: 128
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 40000
  milestones: [10000, 20000, 30000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 5.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-pretrain-Re500-05s-4C4.pt'
  data_iter: 4
  eqn_iter: 4

log:
  project: 'PINO-pretrain-ICLR'
  group: 'Re500-05s-4C4'


================================================
FILE: configs/pretrain/Re500-pretrain-05s-eqn.yaml
================================================
data:
  datapath: '/mnt/md1/zongyi/NS_fft_Re500_T4000.npy'
  Re: 500
  total_num: 4000
  offset: 0
  n_sample: 4000
  time_interval: 0.5
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: True
  num_ics: 300
  S2: 128
  T2: 65

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 40000
  milestones: [10000, 20000, 30000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 1.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-pretrain-Re500-05s-eqn.pt'
  data_iter: 0
  eqn_iter: 1

log:
  project: 'PINO-pretrain-ICLR'
  group: 'Re500-05s-0C1'


================================================
FILE: configs/pretrain/Re500-pretrain-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part0.npy'
  datapath2: 'data/NS_fine_Re500_T128_part1.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 200
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 1
  shuffle: True
  num_ics: 200
  S2: 128
  T2: 128

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 150
  milestones: [25, 50, 75, 100]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-pretrain-Re500-1s-eqn256.pt'

log:
  project: 'PINO-pretrain'
  group: 'Re500-1s-eqn'


================================================
FILE: configs/pretrain/burgers-pretrain.yaml
================================================
data:
  name: Burgers
  datapath: '../data/burgers.mat'
  total_num: 1000
  offset: 0
  n_sample: 800
  nx: 128
  nt: 100
  sub: 1
  sub_t: 1

model:
  layers: [16, 24, 24, 32, 32]
  modes1: [15, 12, 9, 9]
  modes2: [15, 12, 9, 9]
  fc_dim: 128
  act: gelu
  num_pad: 4

train:
  batchsize: 20
  epochs: 500
  milestones: [150, 300, 450]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 10.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'burgers-FDM'
  save_name: 'burgers-pretrain-eqn.pt'

log:
  project: PINO-burgers-pretrain
  group: gelu-eqn
  entity: hzzheng-pino


================================================
FILE: configs/scratch/Re100-scratch-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-scratch-Re100-1s.pt'

log:
  project: 'PINO-scratch-tanh'
  group: 'Re100-scratch-1s'


================================================
FILE: configs/scratch/Re200-scratch-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-scratch-Re200-1s.pt'

log:
  project: 'PINO-scratch-tanh'
  group: 'Re200-scratch-1s'


================================================
FILE: configs/scratch/Re250-scratch-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-scratch-Re250-1s.pt'

log:
  project: 'PINO-scratch-tanh'
  group: 'Re250-scratch-1s'


================================================
FILE: configs/scratch/Re300-scratch-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-scratch-Re300-1s.pt'

log:
  project: 'PINO-scratch-tanh'
  group: 'Re300-scratch-1s'


================================================
FILE: configs/scratch/Re350-scratch-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part0.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-scratch-Re350-1s.pt'

log:
  project: 'PINO-scratch-tanh'
  group: 'Re350-scratch-1s'


================================================
FILE: configs/scratch/Re400-scratch-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part0.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-scratch-Re400-1s.pt'

log:
  project: 'PINO-scratch-tanh'
  group: 'Re400-scratch-1s'


================================================
FILE: configs/scratch/Re500-scratch-05s-new.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 128
  nt: 128
  sub: 1
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 6000
  milestones: [1000, 2000, 3000, 4000, 5000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-scratch-05s.pt'

log:
  project: 'PINO-Re500-exp'
  group: 'Re500-scratch-128'


================================================
FILE: configs/scratch/Re500-scratch-05s.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 20
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 4
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 2500
  milestones: [1000, 1500, 2000]
  base_lr: 0.0025
  beta1: 0.9
  beta2: 0.999
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-scratch-05s.pt'

log:
  project: 'PINO-Re500-ICLR'
  group: 'Re500-scratch-128'
  logfile: 'log/pinns-default.csv'


================================================
FILE: configs/scratch/Re500-scratch-1s-progressive.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: [200, 5800]
  milestones: [1000, 2000, 3000, 4000, 5000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-scratch128-1s.pt'

log:
  project: 'PINO-default'
  group: 'Re500-scratch-1s'


================================================
FILE: configs/scratch/Re500-scratch-1s.yaml
================================================
data:
  datapath: '../data/NS-Re500Part1.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-scratch128-1s.pt'

log:
  entity: 'hzzheng-pino'
  project: 'PINO-NavierStokes'
  group: 'Re500-scratch-1s'


================================================
FILE: configs/test/Re500-05s-deeponet.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 300
  time_interval: 0.5
  nx: 128
  nt: 128
  sub: 2
  sub_t: 2
  shuffle: False

model:
  branch_layers: [100, 100, 100]
  trunk_layers: [100, 100, 100]

test:
  batchsize: 20
  ckpt: 'checkpoints/Re500-deepOnet/DeepONet-pretrain-Re500_10000.pt'

log:
  project: 'PINO-None'
  group: 'eval'


================================================
FILE: configs/test/Re500-05s-test.yaml
================================================
data:
  datapath: 'data/NS_Re500_s256_T100_test.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 20
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 4
  sub_t: 1
  shuffle: False

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

test:
  batchsize: 1
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4k1k.pt'

log:
  project: 'PINO-None'
  group: 'eval'


================================================
FILE: configs/test/Re500-05s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 300
  time_interval: 0.5
  nx: 128
  nt: 128
  sub: 1
  sub_t: 1
  shuffle: False

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

test:
  batchsize: 1
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-05s-4C1.pt'

log:
  project: 'PINO-None'
  group: 'eval'


================================================
FILE: configs/test/Re500-1s-100.yaml
================================================
data:
  datapath: '../data/NS-T4000.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 100
  time_interval: 1
  nx: 64
  nt: 64
  sub: 1
  sub_t: 1
  shuffle: False

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128
  act: gelu

test:
  batchsize: 1
  ckpt: checkpoints/Re500-FNO-100/FNO-Re500-1s-100.pt

log:
  entity: hzzheng-pino
  project: PINO-NS
  group: eval


================================================
FILE: configs/test/burgers.yaml
================================================
data:
  name: 'Darcy'
  datapath: '../data/burgers.mat'
  total_num: 1000
  offset: 800
  n_sample: 200
  nx: 128
  nt: 100
  sub: 1
  sub_t: 1

model:
  layers: [16, 24, 24, 32, 32]
  modes1: [15, 12, 9, 9]
  modes2: [15, 12, 9, 9]
  fc_dim: 128
  act: gelu

test:
  batchsize: 1
  ckpt: 'checkpoints/burgers-FDM/burgers-pretrain-eqn.pt'

log:
  project: 'PINO-burgers-test'
  group: 'gelu-test'


================================================
FILE: configs/test/darcy-deeponet.yaml
================================================
data:
  name: 'Darcy'
  datapath: '/mnt/md1/zongyi/piececonst_r421_N1024_smooth2.mat'
  total_num: 1000
  offset: 0
  n_sample: 500
  nx: 421
  sub: 7
  shuffle: False

model:
  branch_layers: [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
  trunk_layers: [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
  activation: tanh

test:
  batchsize: 1
  ckpt: 'checkpoints/darcy-deeponet/darcy-pretrain-deeponet.pt'

log:
  project: 'PINO-Darcy'
  group: 'default'


================================================
FILE: configs/test/darcy.yaml
================================================
data:
  name: 'Darcy'
  datapath: '/mnt/md1/zongyi/piececonst_r421_N1024_smooth2.mat'
  total_num: 1000
  offset: 0
  n_sample: 500
  nx: 421
  sub: 7
  shuffle: False

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [20, 20, 20, 20]
  modes2: [20, 20, 20, 20]
  fc_dim: 128
  act: gelu

test:
  batchsize: 1
  ckpt: 'checkpoints/darcy-FDM/darcy-pretrain-eqn.pt'

log:
  project: 'PINO-Darcy'
  group: 'default'


================================================
FILE: configs/transfer/Re100to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-transfer-Re100-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to100-1s'


================================================
FILE: configs/transfer/Re100to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-transfer-Re200-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to200-1s'


================================================
FILE: configs/transfer/Re100to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-transfer-Re250-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to250-1s'


================================================
FILE: configs/transfer/Re100to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-transfer-Re300-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to300-1s'


================================================
FILE: configs/transfer/Re100to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-transfer-Re350-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to350-1s'


================================================
FILE: configs/transfer/Re100to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-transfer-Re400-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to400-1s'


================================================
FILE: configs/transfer/Re100to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re100-FDM/PINO-pretrain-Re100-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re100to500-1s'


================================================
FILE: configs/transfer/Re200to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-transfer-Re100-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to100-1s'


================================================
FILE: configs/transfer/Re200to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-transfer-Re200-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to200-1s'


================================================
FILE: configs/transfer/Re200to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-transfer-Re250-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to250-1s'


================================================
FILE: configs/transfer/Re200to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-transfer-Re300-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to300-1s'


================================================
FILE: configs/transfer/Re200to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-transfer-Re350-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to350-1s'


================================================
FILE: configs/transfer/Re200to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-transfer-Re400-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to400-1s'


================================================
FILE: configs/transfer/Re200to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re200-FDM/PINO-pretrain-Re200-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re200to500-1s'


================================================
FILE: configs/transfer/Re250to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-Re100-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to100-1s'


================================================
FILE: configs/transfer/Re250to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-Re200-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to200-1s'


================================================
FILE: configs/transfer/Re250to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-Re250-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to250-1s'


================================================
FILE: configs/transfer/Re250to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-Re300-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to300-1s'


================================================
FILE: configs/transfer/Re250to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-Re350-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to350-1s'


================================================
FILE: configs/transfer/Re250to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-Re400-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to400-1s'


================================================
FILE: configs/transfer/Re250to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 10000
  milestones: [1000, 2000, 3000, 4000, 5000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re250-FDM/PINO-pretrain-Re250-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re250to500-1s'


================================================
FILE: configs/transfer/Re300to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-Re100-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to100-1s'


================================================
FILE: configs/transfer/Re300to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-Re200-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to200-1s'


================================================
FILE: configs/transfer/Re300to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-Re250-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to250-1s'


================================================
FILE: configs/transfer/Re300to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-Re300-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to300-1s'


================================================
FILE: configs/transfer/Re300to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-Re350-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to350-1s'


================================================
FILE: configs/transfer/Re300to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-Re400-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to400-1s'


================================================
FILE: configs/transfer/Re300to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re300-FDM/PINO-pretrain-Re300-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re300to500-1s'


================================================
FILE: configs/transfer/Re350to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-Re100-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to100-1s'


================================================
FILE: configs/transfer/Re350to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-Re200-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to200-1s'


================================================
FILE: configs/transfer/Re350to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-Re250-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to250-1s'


================================================
FILE: configs/transfer/Re350to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-Re300-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to300-1s'


================================================
FILE: configs/transfer/Re350to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-Re300-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to350-1s'


================================================
FILE: configs/transfer/Re350to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-Re400-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to400-1s'


================================================
FILE: configs/transfer/Re350to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re350-FDM/PINO-pretrain-Re350-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re350to500-1s'


================================================
FILE: configs/transfer/Re400to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-Re100-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to100-1s'


================================================
FILE: configs/transfer/Re400to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-Re200-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to200-1s'


================================================
FILE: configs/transfer/Re400to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-Re250-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to250-1s'


================================================
FILE: configs/transfer/Re400to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-Re300-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to300-1s'


================================================
FILE: configs/transfer/Re400to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-Re350-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to350-1s'


================================================
FILE: configs/transfer/Re400to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-Re400-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to400-1s'


================================================
FILE: configs/transfer/Re400to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re400-FDM/PINO-pretrain-Re400-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re400to500-1s'


================================================
FILE: configs/transfer/Re500to100-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re100_T128_part2.npy'
  Re: 100
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re100-FDM'
  save_name: 'PINO-Re100-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re500to100-1s'


================================================
FILE: configs/transfer/Re500to200-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re200_T128_part2.npy'
  Re: 200
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re200-FDM'
  save_name: 'PINO-Re200-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re500to200-1s'


================================================
FILE: configs/transfer/Re500to250-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re250_T128_part2.npy'
  Re: 250
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re250-FDM'
  save_name: 'PINO-Re250-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re500to250-1s'


================================================
FILE: configs/transfer/Re500to300-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re300_T128_part2.npy'
  Re: 300
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re300-FDM'
  save_name: 'PINO-Re300-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re500to300-1s'


================================================
FILE: configs/transfer/Re500to350-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re350_T128_part2.npy'
  Re: 350
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re350-FDM'
  save_name: 'PINO-Re350-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re500to350-1s'


================================================
FILE: configs/transfer/Re500to400-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re400_T128_part2.npy'
  Re: 400
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 2
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000, 7000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re400-FDM'
  save_name: 'PINO-Re400-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer-tanh'
  group: 'Re500to400-1s'


================================================
FILE: configs/transfer/Re500to500-05s-new.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_s2048_T100.npy'
  Re: 500
  total_num: 100
  offset: 300
  n_sample: 1
  time_interval: 0.5
  nx: 256
  nt: 128
  sub: 2
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-05.pt'
  ckpt: 'checkpoints/Re500-FDM/pretrain-Re500-05s-4000.pt'

log:
  project: 'PINO-Re500-exp'
  group: 'Re500to500-128-4k-all'


================================================
FILE: configs/transfer/Re500to500-05s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 0.5
  nx: 128
  nt: 128
  sub: 1
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 5000
  milestones: [1000, 2000, 3000, 4000]
  base_lr: 0.001
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/pretrain-Re500-05s-4000.pt'

log:
  project: 'PINO-Re500-exp'
  group: 'Re500to500-05s-4layer'


================================================
FILE: configs/transfer/Re500to500-1s.yaml
================================================
data:
  datapath: 'data/NS_fine_Re500_T128_part2.npy'
  Re: 500
  total_num: 100
  offset: 0
  n_sample: 1
  time_interval: 1.0
  nx: 128
  nt: 128
  sub: 1
  sub_t: 1
  shuffle: True

model:
  layers: [64, 64, 64, 64, 64]
  modes1: [8, 8, 8, 8]
  modes2: [8, 8, 8, 8]
  modes3: [8, 8, 8, 8]
  fc_dim: 128

train:
  batchsize: 1
  epochs: 8000
  milestones: [1000, 2000, 3000, 4000, 5000, 6000]
  base_lr: 0.0025
  scheduler_gamma: 0.5
  ic_loss: 5.0
  f_loss: 1.0
  xy_loss: 0.0
  save_dir: 'Re500-FDM'
  save_name: 'PINO-Re500-1s.pt'
  ckpt: 'checkpoints/Re500-FDM/PINO-pretrain-Re500-1s.pt'

log:
  project: 'PINO-transfer'
  group: 'Re500to500-1s-new'


================================================
FILE: deeponet.py
================================================
import yaml
from argparse import ArgumentParser
from baselines.train_ns import train_deeponet_cp
from baselines.test import test_deeponet_ns, test_deeponet_darcy
from baselines.train_darcy import train_deeponet_darcy


if __name__ == '__main__':
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--mode', type=str, default='train', help='Train or test')
    args = parser.parse_args()

    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    if args.mode == 'train':
        print('Start training DeepONet Cartesian Product')
        if 'name' in config['data'] and config['data']['name'] == 'Darcy':
            train_deeponet_darcy(config)
        else:
            train_deeponet_cp(config)
    else:
        print('Start testing DeepONet Cartesian Product')
        if 'name' in config['data'] and config['data']['name'] == 'Darcy':
            test_deeponet_darcy(config)
        else:
            test_deeponet_ns(config)
    print('Done!')

================================================
FILE: download_data.py
================================================
import os
from argparse import ArgumentParser
import requests
from tqdm import tqdm


_url_dict = {
    'NS-T4000': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fft_Re500_T4000.npy', 
    'NS-Re500Part0': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re500_T128_part0.npy', 
    'NS-Re500Part1': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re500_T128_part1.npy', 
    'NS-Re500Part2': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re500_T128_part2.npy', 
    'NS-Re100Part0': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS_fine_Re100_T128_part0.npy', 
    'burgers': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/burgers_pino.mat', 
    'NS-Re500_T300_id0': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/NS-Re500_T300_id0.npy',
    'NS-Re500_T300_id0-shuffle': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/NS-Re500_T300_id0-shuffle.npy',
    'darcy-train': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/piececonst_r421_N1024_smooth1.mat', 
    'darcy-test': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/piececonst_r421_N1024_smooth2.mat', 
    'cavity': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/data/cavity.mat',
    'Re500-1_8s-800-pino-140k': 'https://hkzdata.s3.us-west-2.amazonaws.com/PINO/checkpoints/Re500-1_8s-800-PINO-140000.pt',
}


def download_file(url, file_path):
    print('Start downloading...')
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(file_path, 'wb') as f:
            for chunk in tqdm(r.iter_content(chunk_size=256 * 1024 * 1024)):
                f.write(chunk)
    print('Complete')


def main(args):
    url = _url_dict[args.name]
    file_name = url.split('/')[-1]
    os.makedirs(args.outdir, exist_ok=True)
    file_path = os.path.join(args.outdir, file_name)
    download_file(url, file_path)


if __name__ == '__main__':
    parser = ArgumentParser(description='Parser for downloading assets')
    parser.add_argument('--name', type=str, default='NS-T4000')
    parser.add_argument('--outdir', type=str, default='../data')
    args = parser.parse_args()
    main(args)

================================================
FILE: eval_operator.py
================================================
import yaml

import torch
from torch.utils.data import DataLoader
from models import FNO3d, FNO2d
from train_utils import NSLoader, get_forcing, DarcyFlow

from train_utils.eval_3d import eval_ns
from train_utils.eval_2d import eval_darcy

from argparse import ArgumentParser


def test_3d(config):
    device = 0 if torch.cuda.is_available() else 'cpu'
    data_config = config['data']
    loader = NSLoader(datapath1=data_config['datapath'],
                      nx=data_config['nx'], nt=data_config['nt'],
                      sub=data_config['sub'], sub_t=data_config['sub_t'],
                      N=data_config['total_num'],
                      t_interval=data_config['time_interval'])

    eval_loader = loader.make_loader(n_sample=data_config['n_sample'],
                                     batch_size=config['test']['batchsize'],
                                     start=data_config['offset'],
                                     train=data_config['shuffle'])
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers']).to(device)

    if 'ckpt' in config['test']:
        ckpt_path = config['test']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    print(f'Resolution : {loader.S}x{loader.S}x{loader.T}')
    forcing = get_forcing(loader.S).to(device)
    eval_ns(model,
            loader,
            eval_loader,
            forcing,
            config,
            device=device)


def test_2d(config):
    device = 0 if torch.cuda.is_available() else 'cpu'
    data_config = config['data']
    dataset = DarcyFlow(data_config['datapath'],
                        nx=data_config['nx'], sub=data_config['sub'],
                        offset=data_config['offset'], num=data_config['n_sample'])
    dataloader = DataLoader(dataset, batch_size=config['test']['batchsize'], shuffle=False)
    print(device)
    model = FNO2d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'],
                  act=config['model']['act']).to(device)
    # Load from checkpoint
    if 'ckpt' in config['test']:
        ckpt_path = config['test']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    eval_darcy(model, dataloader, config, device)


if __name__ == '__main__':
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    options = parser.parse_args()
    config_file = options.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    if 'name' in config['data'] and config['data']['name'] == 'Darcy':
        test_2d(config)
    else:
        test_3d(config)


================================================
FILE: generate_data.py
================================================
import math
import numpy as np
import os
from tqdm import tqdm

import torch
from solver.random_fields import GaussianRF, GaussianRF2d
from solver.kolmogorov_flow import KolmogorovFlow2d
from solver.periodic import NavierStokes2d
from timeit import default_timer
import argparse


def legacy_solver(args):
    save_dir = args.outdir
    os.makedirs(save_dir, exist_ok=True)
    device = torch.device('cuda:0')
    s = 1024
    sub = s // args.res_x
    
    n = 4   # forcing
    Re = args.re

    T_in = 100.0
    T = args.T
    t = args.t_res
    dt = 1.0 / t

    GRF = GaussianRF(2, s, 2 * math.pi, alpha=2.5, tau=7, device=device)
    u0 = GRF.sample(1)

    NS = KolmogorovFlow2d(u0, Re, n)
    NS.advance(T_in, delta_t=1e-3)

    sol = np.zeros((T, t + 1, s // sub, s // sub))
    sol_ini = NS.vorticity().squeeze(0).cpu().numpy()[::sub, ::sub]
    pbar = tqdm(range(T))
    for i in pbar:
        sol[i, 0, :, :] = sol_ini
        for j in range(t):
            t1 = default_timer()
            NS.advance(dt, delta_t=1e-3)
            sol[i, j + 1, :, :] = NS.vorticity().squeeze(0).cpu().numpy()[::sub, ::sub]
            t2 = default_timer()
        pbar.set_description(
            (
                f'{i}, time cost: {t2-t1}'
            )
        )
        sol_ini = sol[i, -1, :, :]
    
    save_path = os.path.join(save_dir, f'NS-Re{int(Re)}_T{t}.npy')
    # np.save('NS_fine_Re500_S512_s64_T500_t128.npy', sol)
    np.save(save_path, sol)


def gen_data(args):
    dtype = torch.float64
    device = torch.device('cuda:0')
    save_dir = args.outdir
    os.makedirs(save_dir, exist_ok=True)
    
    T = args.T  # total time
    bsize = args.batchsize
    L = 2 * math.pi
    s =args.x_res
    x_sub = args.x_sub

    t_res = args.t_res
    dt = 1 / t_res
    re = args.re

    solver = NavierStokes2d(s,s,L,L,device=device,dtype=dtype)
    grf = GaussianRF2d(s,s,L,L,alpha=2.5,tau=3.0,sigma=None,device=device,dtype=dtype)

    t = torch.linspace(0, L, s+1, dtype=dtype, device=device)[0:-1]
    _, Y = torch.meshgrid(t, t, indexing='ij')
    f = -4*torch.cos(4.0*Y)
    vor = np.zeros((bsize, T, t_res + 1, s // x_sub, s // x_sub))

    pbar = tqdm(range(T))
    w = grf.sample(bsize)
    w = solver.advance(w, f, T=100, Re=re, adaptive=True)
    
    init_vor = w[:, ::x_sub, ::x_sub].cpu().type(torch.float32).numpy()
    for j in pbar:
        vor[:, j, 0, :, :] = init_vor

        for k in range(t_res):
            t1 = default_timer()

            w = solver.advance(w, f, T=dt, Re=re, adaptive=True)
            vor[:, j, k+1, :, :] = w[:,::x_sub,::x_sub].cpu().type(torch.float32).numpy()

            t2 = default_timer()

            pbar.set_description(
            (
                f'{j}, time cost: {t2-t1}'
            )
        )
        init_vor = vor[:, j, -1, :, :]

    for i in range(bsize):
        save_path = os.path.join(save_dir, f'NS-Re{int(re)}_T{T}_id{i}.npy')
        # np.save('NS_fine_Re500_S512_s64_T500_t128.npy', sol)
        np.save(save_path, vor[i])


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--re', type=float, default=40.0)
    parser.add_argument('--x_res', type=int, default=512)
    parser.add_argument('--x_sub', type=int, default=2)
    parser.add_argument('--T', type=int, default=300)
    parser.add_argument('--outdir', type=str, default='../data')
    parser.add_argument('--t_res', type=int, default=512)
    parser.add_argument('--batchsize', type=int, default=1)
    parser.add_argument('--num_batchs', type=int, default=1)
    args = parser.parse_args()
    gen_data(args)

================================================
FILE: inference.py
================================================
'''
This code generates the prediction on one instance. 
Both the ground truth and the prediction are saved in a .pt file.
'''
import os
import yaml
from argparse import ArgumentParser

import torch
from torch.utils.data import DataLoader

from models import FNO3d

from train_utils.datasets import KFDataset
from train_utils.losses import LpLoss
from train_utils.utils import count_params


@torch.no_grad()
def get_pred(args):
    with open(args.config, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)
    basedir = os.path.join('exp', config['log']['logdir'])
    save_dir = os.path.join(basedir, 'results')
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir,'fno-prediction.pt')
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # prepare data
    dataset = KFDataset(paths=config['data']['paths'], 
                        raw_res=config['data']['raw_res'],
                        data_res=config['data']['data_res'], 
                        pde_res=config['data']['data_res'], 
                        n_samples=config['data']['n_test_samples'],
                        total_samples=config['data']['total_test_samples'],
                        offset=config['data']['testoffset'], 
                        t_duration=config['data']['t_duration'])
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, drop_last=False)

    # create model
    model = FNO3d(modes1=config['model']['modes1'],
                modes2=config['model']['modes2'],
                modes3=config['model']['modes3'],
                fc_dim=config['model']['fc_dim'],
                layers=config['model']['layers'], 
                act=config['model']['act'], 
                pad_ratio=config['model']['pad_ratio']).to(device)
    num_params = count_params(model)
    print(f'Number of parameters: {num_params}')
    if args.ckpt_path:
        ckpt = torch.load(args.ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % args.ckpt_path)
    # metric
    lploss = LpLoss(size_average=True)
    model.eval()
    truth_list = []
    pred_list = []
    for u, a_in in dataloader:
        u, a_in = u.to(device), a_in.to(device)
        out = model(a_in)
        data_loss = lploss(out, u)
        print(data_loss.item())
        truth_list.append(u.cpu())
        pred_list.append(out.cpu())
    truth_arr = torch.cat(truth_list, dim=0)
    pred_arr = torch.cat(pred_list, dim=0)
    torch.save({
        'truth': truth_arr,
        'pred': pred_arr,
    }, save_path)


if __name__ == "__main__":
    torch.backends.cudnn.benchmark = True
    parser = ArgumentParser()
    parser.add_argument('--config', type=str, default='configs/config.yaml')
    parser.add_argument('--ckpt_path', type=str, default=None)
    args = parser.parse_args()
    get_pred(args)

================================================
FILE: instance_opt.py
================================================
import os
import yaml
import random
from argparse import ArgumentParser
import math
from tqdm import tqdm

import torch

from torch.optim import Adam
from torch.utils.data import DataLoader, Subset

from models import FNO3d

from train_utils.losses import LpLoss, PINO_loss3d, get_forcing
from train_utils.datasets import KFDataset, KFaDataset, sample_data
from train_utils.utils import save_ckpt, count_params, dict2str

try:
    import wandb
except ImportError:
    wandb = None


def train_ns(model, 
             u_loader,        # training data
             optimizer, 
             scheduler,
             device, config, args):

    v = 1/ config['data']['Re']
    t_duration = config['data']['t_duration']
    save_step = config['train']['save_step']

    ic_weight = config['train']['ic_loss']
    f_weight = config['train']['f_loss']
    # set up directory
    base_dir = os.path.join('exp', config['log']['logdir'])
    ckpt_dir = os.path.join(base_dir, 'ckpts')
    os.makedirs(ckpt_dir, exist_ok=True)

    # loss fn
    lploss = LpLoss(size_average=True)
    
    S = config['data']['pde_res'][0]
    forcing = get_forcing(S).to(device)
    # set up wandb
    if wandb and args.log:
        run = wandb.init(project=config['log']['project'], 
                         entity=config['log']['entity'], 
                         group=config['log']['group'], 
                         config=config, reinit=True, 
                         settings=wandb.Settings(start_method='fork'))
    
    pbar = range(config['train']['num_iter'])
    if args.tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.2)

    u_loader = sample_data(u_loader)

    for e in pbar:
        log_dict = {}

        optimizer.zero_grad()
        # data loss
        u, a_in = next(u_loader)
        u = u.to(device)
        a_in = a_in.to(device)
        out = model(a_in)
        data_loss = lploss(out, u)
            
        u0  = a_in[:, :, :, 0, -1]
        loss_ic, loss_f = PINO_loss3d(out, u0, forcing, v, t_duration)
        log_dict['IC'] = loss_ic.item()
        log_dict['PDE'] = loss_f.item()
        loss = loss_f * f_weight + loss_ic * ic_weight

        loss.backward()
        optimizer.step()
        scheduler.step()

        log_dict['train loss'] = loss.item()
        log_dict['test error'] = data_loss.item()
        
        if args.tqdm:
            logstr = dict2str(log_dict)
            pbar.set_description(
                (
                    logstr
                )
            )
        if wandb and args.log:
            wandb.log(log_dict)
        if e % save_step == 0 and e > 0:
            ckpt_path = os.path.join(ckpt_dir, f'model-{e}.pt')
            save_ckpt(ckpt_path, model, optimizer)

    # clean up wandb
    if wandb and args.log:
        run.finish()
        
    # save prediction and truth
    save_dir = os.path.join(base_dir, 'results')
    os.makedirs(save_dir, exist_ok=True)
    result_path = os.path.join(save_dir, f'results-{args.idx}.pt')

    criterion = LpLoss()

    model.eval()
    with torch.no_grad():
        u, a_in = next(u_loader)
        u = u.to(device)
        a_in = a_in.to(device)
        out = model(a_in)
        error = criterion(out, u)
        print(f'Test error: {error.item()}')
        torch.save({'truth': u.cpu(), 'pred': out.cpu()}, result_path)
    print(f'Results saved to {result_path}')


def subprocess(args):
    with open(args.config, 'r') as f:
        config = yaml.load(f, yaml.FullLoader)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # set random seed
    config['seed'] = args.seed
    seed = args.seed
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    # create model 
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'], 
                  act=config['model']['act'], 
                  pad_ratio=config['model']['pad_ratio']).to(device)
    num_params = count_params(model)
    config['num_params'] = num_params
    print(f'Number of parameters: {num_params}')
    # Load from checkpoint
    if args.ckpt:
        ckpt_path = args.ckpt
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    
    # training set
    batchsize = config['train']['batchsize']
    dataset = KFDataset(paths=config['data']['paths'], 
                        raw_res=config['data']['raw_res'],
                        data_res=config['data']['data_res'], 
                        pde_res=config['data']['data_res'], 
                        n_samples=config['data']['n_test_samples'], 
                        total_samples=1,
                        idx=args.idx,
                        offset=config['data']['testoffset'], 
                        t_duration=config['data']['t_duration'])
    u_loader = DataLoader(dataset, batch_size=1)

    optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                     milestones=config['train']['milestones'], 
                                                     gamma=config['train']['scheduler_gamma'])
    train_ns(model, 
             u_loader, 
             optimizer, 
             scheduler, 
             device, 
             config, 
             args)
    print('Done!')
        
        
if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    # parse options
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config', type=str, help='Path to the configuration file')
    parser.add_argument('--idx', type=int, default=0, help='Index of the instance')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--ckpt', type=str, default=None)
    parser.add_argument('--tqdm', action='store_true', help='Turn on the tqdm')
    args = parser.parse_args()
    if args.seed is None:
        args.seed = random.randint(0, 100000)
    subprocess(args)

================================================
FILE: inverse-darcy-foward.py
================================================


from timeit import default_timer

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.optim import Adam

from train_utils.datasets import MatReader
from train_utils.losses import LpLoss
from train_utils.utils import count_params

torch.manual_seed(0)
np.random.seed(0)


################################################################
# fourier layer
################################################################
class SpectralConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2):
        super(SpectralConv2d, self).__init__()

        """
        2D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1  # Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes2 = modes2

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))
        self.weights2 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))

    # Complex multiplication
    def compl_mul2d(self, input, weights):
        # (batch, in_channel, x,y ), (in_channel, out_channel, x,y) -> (batch, out_channel, x,y)
        return torch.einsum("bixy,ioxy->boxy", input, weights)

    def forward(self, x):
        batchsize = x.shape[0]
        # Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfft2(x)

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.out_channels, x.size(-2), x.size(-1) // 2 + 1, dtype=torch.cfloat,
                             device=x.device)
        out_ft[:, :, :self.modes1, :self.modes2] = \
            self.compl_mul2d(x_ft[:, :, :self.modes1, :self.modes2], self.weights1)
        out_ft[:, :, -self.modes1:, :self.modes2] = \
            self.compl_mul2d(x_ft[:, :, -self.modes1:, :self.modes2], self.weights2)

        # Return to physical space
        x = torch.fft.irfft2(out_ft, s=(x.size(-2), x.size(-1)))
        return x


class FNO2d(nn.Module):
    def __init__(self, modes1, modes2, width):
        super(FNO2d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .

        input: the solution of the coefficient function and locations (a(x, y), x, y)
        input shape: (batchsize, x=s, y=s, c=3)
        output: the solution 
        output shape: (batchsize, x=s, y=s, c=1)
        """

        self.modes1 = modes1
        self.modes2 = modes2
        self.width = width
        self.padding = 9  # pad the domain if input is non-periodic
        self.fc0 = nn.Linear(3, 128)  # input channel is 3: (a(x, y), x, y)
        self.fc1 = nn.Linear(128, self.width)

        self.conv0 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv1 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv2 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv3 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.w0 = nn.Conv2d(self.width, self.width, 1)
        self.w1 = nn.Conv2d(self.width, self.width, 1)
        self.w2 = nn.Conv2d(self.width, self.width, 1)
        self.w3 = nn.Conv2d(self.width, self.width, 1)

        self.fc2 = nn.Linear(self.width, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):

        grid = self.get_grid(x.shape, x.device)
        x = torch.cat((x, grid), dim=-1)
        x = self.fc0(x)
        x = F.gelu(x)
        x = self.fc1(x)
        x = x.permute(0, 3, 1, 2)
        x = F.pad(x, [0, self.padding, 0, self.padding])

        x1 = self.conv0(x)
        x2 = self.w0(x)
        x = x1 + x2
        x = F.gelu(x)

        x1 = self.conv1(x)
        x2 = self.w1(x)
        x = x1 + x2
        x = F.gelu(x)

        x1 = self.conv2(x)
        x2 = self.w2(x)
        x = x1 + x2
        x = F.gelu(x)

        x1 = self.conv3(x)
        x2 = self.w3(x)
        x = x1 + x2

        x = x[..., :-self.padding, :-self.padding]
        x = x.permute(0, 2, 3, 1)
        x = self.fc2(x)
        x = F.gelu(x)
        x = self.fc3(x)

        return x

    def get_grid(self, shape, device):
        batchsize, size_x, size_y = shape[0], shape[1], shape[2]
        gridx = torch.tensor(np.linspace(0, 1, size_x), dtype=torch.float)
        gridx = gridx.reshape(1, size_x, 1, 1).repeat([batchsize, 1, size_y, 1])
        gridy = torch.tensor(np.linspace(0, 1, size_y), dtype=torch.float)
        gridy = gridy.reshape(1, 1, size_y, 1).repeat([batchsize, size_x, 1, 1])
        return torch.cat((gridx, gridy), dim=-1).to(device)

pretrain = False
finetune = not pretrain

TRAIN_PATH = '../data/darcy_s61_N1200.mat'
TEST_PATH = '../data/darcy_s61_N1200.mat'
# TRAIN_PATH = '../data/lognormal_N1024_s61.mat'
# TEST_PATH = '../data/lognormal_N1024_s61.mat'
# TRAIN_PATH = '../data/piececonst_r241_N1024_smooth1.mat'
# TEST_PATH = '../data/piececonst_r241_N1024_smooth2.mat'

ntrain = 1000
ntest = 1

batch_size = 1
learning_rate = 0.001

epochs = 500
step_size = 100
gamma = 0.5

modes = 12
width = 32

r = 1
h = int(((61 - 1)/r) + 1)
s = h

print(s)

path = 'PINO_FDM_darcy_N'+str(ntrain)+'_ep' + str(epochs) + '_m' + str(modes) + '_w' + str(width)
path_model = '../model/'+path
path_pred = '../pred/'+path+'.mat'

reader = MatReader(TRAIN_PATH)
# x_train = reader.read_field('coeff')[:ntrain,::r,::r][:,:s,:s]
# y_train = reader.read_field('sol')[:ntrain,::r,::r][:,:s,:s]
x_train = reader.read_field('input')[:ntrain,::r,::r][:,:s,:s]
y_train = reader.read_field('output')[:ntrain,::r,::r][:,:s,:s]

reader.load_file(TEST_PATH)
# x_test = reader.read_field('coeff')[-ntest:,::r,::r][:,:s,:s]
# y_test = reader.read_field('sol')[-ntest:,::r,::r][:,:s,:s]
a = 1
x_test = reader.read_field('input')[-ntest-a:-a,::r,::r][:,:s,:s]
y_test = reader.read_field('output')[-ntest-a:-a,::r,::r][:,:s,:s]


print(torch.mean(x_train), torch.mean(y_train))

# x_normalizer = UnitGaussianNormalizer(x_train)
# x_train = x_normalizer.encode(x_train)
# x_test = x_normalizer.encode(x_test)
#
# y_normalizer = UnitGaussianNormalizer(y_train)
# y_train = y_normalizer.encode(y_train)

grids = []
grids.append(np.linspace(0, 1, s))
grids.append(np.linspace(0, 1, s))
grid = np.vstack([xx.ravel() for xx in np.meshgrid(*grids)]).T
grid = grid.reshape(1,s,s,2)
grid = torch.tensor(grid, dtype=torch.float)


myloss = LpLoss(size_average=False)


def FDM_Darcy(u, a, D=1, f=1):
    batchsize = u.size(0)
    size = u.size(1)
    u = u.reshape(batchsize, size, size)
    a = a.reshape(batchsize, size, size)
    dx = D / (size - 1)
    dy = dx

    # ux: (batch, size-2, size-2)
    ux = (u[:, 2:, 1:-1] - u[:, :-2, 1:-1]) / (2 * dx)
    uy = (u[:, 1:-1, 2:] - u[:, 1:-1, :-2]) / (2 * dy)

    ax = (a[:, 2:, 1:-1] - a[:, :-2, 1:-1]) / (2 * dx)
    ay = (a[:, 1:-1, 2:] - a[:, 1:-1, :-2]) / (2 * dy)
    uxx = (u[:, 2:, 1:-1] -2*u[:,1:-1,1:-1] +u[:, :-2, 1:-1]) / (dx**2)
    uyy = (u[:, 1:-1, 2:] -2*u[:,1:-1,1:-1] +u[:, 1:-1, :-2]) / (dy**2)

    a = a[:, 1:-1, 1:-1]
    u = u[:, 1:-1, 1:-1]
    # Du = -(ax*ux + ay*uy + a*uxx + a*uyy)

    # inner1 = torch.mean(a*(ux**2 + uy**2), dim=[1,2])
    # inner2 = torch.mean(f*u, dim=[1,2])
    # return 0.5*inner1 - inner2

    aux = a * ux
    auy = a * uy
    auxx = (aux[:, 2:, 1:-1] - aux[:, :-2, 1:-1]) / (2 * dx)
    auyy = (auy[:, 1:-1, 2:] - auy[:, 1:-1, :-2]) / (2 * dy)
    Du = - (auxx + auyy)

    return Du


def PINO_loss(u, a):
    batchsize = u.size(0)
    size = u.size(1)
    u = u.reshape(batchsize, size, size)
    a = a.reshape(batchsize, size, size)
    lploss = LpLoss(size_average=True)

    index_x = torch.cat([torch.tensor(range(0, size)), (size - 1) * torch.ones(size), torch.tensor(range(size-1, 1, -1)),
                         torch.zeros(size)], dim=0).long()
    index_y = torch.cat([(size - 1) * torch.ones(size), torch.tensor(range(size-1, 1, -1)), torch.zeros(size),
                         torch.tensor(range(0, size))], dim=0).long()

    boundary_u = u[:, index_x, index_y]
    truth_u = torch.zeros(boundary_u.shape, device=u.device)
    loss_bd = lploss.abs(boundary_u, truth_u)

    Du = FDM_Darcy(u, a)
    f = torch.ones(Du.shape, device=u.device)
    loss_f = lploss(Du, f)


    # im = (Du-f)[0].detach().cpu().numpy()
    # plt.imshow(im)
    # plt.show()

    # loss_f = FDM_Darcy(u, a)
    # loss_f = torch.mean(loss_f)
    return loss_f, loss_bd

error = np.zeros((epochs, 4))
# x_normalizer.cuda()
# y_normalizer.cuda()
grid = grid.cuda()
mollifier = torch.sin(np.pi*grid[...,0]) * torch.sin(np.pi*grid[...,1]) * 0.001

print(mollifier.shape)
if pretrain:
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_test, y_test), batch_size=batch_size,
                                              shuffle=False)

    model = FNO2d(modes, modes, width).cuda()
    num_param = count_params(model)
    print(num_param)
    optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    for ep in range(epochs):
        model.train()
        t1 = default_timer()
        train_pino = 0.0
        train_l2 = 0.0
        train_loss = 0
        for x, y in train_loader:
            x, y = x.cuda(), y.cuda()

            optimizer.zero_grad()
            out = model(x.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
            out = out * mollifier

            loss_data = myloss(out.view(batch_size,-1), y.view(batch_size,-1))
            loss_f, loss_bd = PINO_loss(out, x)
            pino_loss = loss_f
            pino_loss.backward()

            optimizer.step()
            train_l2 += loss_data.item()
            train_pino += pino_loss.item()
            train_loss += torch.tensor([loss_bd, loss_f])

        scheduler.step()

        model.eval()
        test_l2 = 0.0
        test_pino = 0.0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.cuda(), y.cuda()

                out = model(x.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
                out = out * mollifier

                test_l2 += myloss(out.view(batch_size, -1), y.view(batch_size, -1)).item()
                loss_f, loss_bd = PINO_loss(out, x)
                test_pino += loss_f.item() + loss_bd.item()

        train_l2 /= ntrain
        test_l2 /= ntest
        train_pino /= ntrain
        test_pino /= ntest
        train_loss /= ntrain

        error[ep] = [train_pino, train_l2, test_pino, test_l2]

        t2 = default_timer()
        print(ep, t2-t1, train_pino, train_l2, test_pino, test_l2)
        print(train_loss)

    # torch.save(model, '../model/IP-dracy-forward')

def darcy_mask1(x):
    return 1 / (1 + torch.exp(-x)) * 9 + 3

def darcy_mask2(x):
    x = 1 / (1 + torch.exp(-x))
    x[x>0.5] = 1
    x[x<=0.5] = 0
    # x = torch.tensor(x>0.5, dtype=torch.float)
    return  x * 9 + 3

def total_variance(x):
    return torch.mean(torch.abs(x[...,:-1] - x[...,1:])) + torch.mean(torch.abs(x[...,:-1,:] - x[...,1:,:]))


if finetune:
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_test, y_test), batch_size=batch_size,
                                              shuffle=False)

    model = torch.load('../model/IP-dracy-forward').cuda()
    num_param = count_params(model)
    print(num_param)
    xout = torch.rand([1,s,s,1], requires_grad=True, device="cuda")

    optimizer = Adam([xout], lr=0.1, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2000, gamma=0.5)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=step_size)

    for ep in range(10000):
        model.train()
        t1 = default_timer()

        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            optimizer.zero_grad()
            out_masked = darcy_mask1(xout)

            yout = model(out_masked.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
            yout = yout * mollifier
            loss_data = myloss(yout.view(batch_size, -1), y.view(batch_size, -1))
            loss_f, loss_bd = PINO_loss(y, out_masked)
            loss_TV = total_variance(xout)
            pino_loss = 0.2 * loss_f + loss_data + 0.05 * loss_TV
            # pino_loss = 0. * loss_f + loss_data + 0.05 * loss_TV
            pino_loss.backward()
            optimizer.step()
            scheduler.step()

            out_masked2 = darcy_mask2(xout)
            yout2 = model(out_masked2.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
            yout2 = yout2 * mollifier
            testx_l2 = myloss(out_masked.view(batch_size, -1), x.view(batch_size, -1)).item()
            testy_l2 = myloss(yout.view(batch_size, -1), y.view(batch_size, -1)).item()


        t2 = default_timer()
        print(ep, t2 - t1, loss_data.item(), loss_f.item(), testx_l2, testy_l2)

        if ep % 2000 == 1:
            # fig, axs = plt.subplots(2, 3, figsize=(8, 8))
            # axs[0,0].imshow(x.reshape(s,s).detach().cpu().numpy())
            # axs[0,1].imshow(out_masked.reshape(s,s).detach().cpu().numpy())
            # axs[0,2].imshow(out_masked2.reshape(s,s).detach().cpu().numpy())
            # axs[1,0].imshow(y.reshape(s,s).detach().cpu().numpy())
            # axs[1,1].imshow(yout.reshape(s,s).detach().cpu().numpy())
            # axs[1,2].imshow(yout2.reshape(s,s).detach().cpu().numpy())
            # plt.show()
            name_tag = 'PINO-'
            plt.imshow(x.reshape(s,s).detach().cpu().numpy())
            plt.savefig(name_tag+'true-input.pdf',bbox_inches='tight')
            plt.imshow(out_masked.reshape(s,s).detach().cpu().numpy())
            plt.savefig(name_tag+'raw-input.pdf',bbox_inches='tight')
            plt.imshow(out_masked2.reshape(s,s).detach().cpu().numpy())
            plt.savefig(name_tag+'clip-input.pdf',bbox_inches='tight')

            plt.imshow(y.reshape(s,s).detach().cpu().numpy())
            plt.savefig(name_tag+'true-output.pdf',bbox_inches='tight')
            plt.imshow(yout.reshape(s,s).detach().cpu().numpy())
            plt.savefig(name_tag+'raw-output.pdf',bbox_inches='tight')
            plt.imshow(yout.reshape(s,s).detach().cpu().numpy())
            plt.savefig(name_tag+'clip-output.pdf',bbox_inches='tight')

            # scipy.io.savemat('../pred/IP-darcy-forward.mat', mdict={'input_truth': x.reshape(s,s).detach().cpu().numpy(),
            #                                    'input_pred': out_masked.reshape(s,s).detach().cpu().numpy(),
            #                                     'output_truth': y.reshape(s,s).detach().cpu().numpy(),
            #                                     'output_pred': yout.reshape(s,s).detach().cpu().numpy()})


================================================
FILE: inverse-darcy.py
================================================

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import scipy.io
import matplotlib.pyplot as plt
from timeit import default_timer
from torch.optim import Adam

from train_utils.datasets import MatReader
from train_utils.losses import LpLoss
from train_utils.utils import count_params

torch.manual_seed(0)
np.random.seed(0)


################################################################
# fourier layer
################################################################
class SpectralConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2):
        super(SpectralConv2d, self).__init__()

        """
        2D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1  # Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes2 = modes2

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))
        self.weights2 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))

    # Complex multiplication
    def compl_mul2d(self, input, weights):
        # (batch, in_channel, x,y ), (in_channel, out_channel, x,y) -> (batch, out_channel, x,y)
        return torch.einsum("bixy,ioxy->boxy", input, weights)

    def forward(self, x):
        batchsize = x.shape[0]
        # Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfft2(x)

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.out_channels, x.size(-2), x.size(-1) // 2 + 1, dtype=torch.cfloat,
                             device=x.device)
        out_ft[:, :, :self.modes1, :self.modes2] = \
            self.compl_mul2d(x_ft[:, :, :self.modes1, :self.modes2], self.weights1)
        out_ft[:, :, -self.modes1:, :self.modes2] = \
            self.compl_mul2d(x_ft[:, :, -self.modes1:, :self.modes2], self.weights2)

        # Return to physical space
        x = torch.fft.irfft2(out_ft, s=(x.size(-2), x.size(-1)))
        return x


class FNO2d(nn.Module):
    def __init__(self, modes1, modes2, width):
        super(FNO2d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .

        input: the solution of the coefficient function and locations (a(x, y), x, y)
        input shape: (batchsize, x=s, y=s, c=3)
        output: the solution 
        output shape: (batchsize, x=s, y=s, c=1)
        """

        self.modes1 = modes1
        self.modes2 = modes2
        self.width = width
        self.padding = 9  # pad the domain if input is non-periodic
        self.fc0 = nn.Linear(3, 128)  # input channel is 3: (a(x, y), x, y)
        self.fc1 = nn.Linear(128, self.width)

        self.conv0 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv1 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv2 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv3 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.w0 = nn.Conv2d(self.width, self.width, 1)
        self.w1 = nn.Conv2d(self.width, self.width, 1)
        self.w2 = nn.Conv2d(self.width, self.width, 1)
        self.w3 = nn.Conv2d(self.width, self.width, 1)

        self.fc2 = nn.Linear(self.width, 128)
        self.fc3 = nn.Linear(128, 1)

    def forward(self, x):
        grid = self.get_grid(x.shape, x.device)
        x = torch.cat((x, grid), dim=-1)
        x = self.fc0(x)
        x = F.gelu(x)
        x = self.fc1(x)
        x = x.permute(0, 3, 1, 2)
        x = F.pad(x, [0, self.padding, 0, self.padding])

        x1 = self.conv0(x)
        x2 = self.w0(x)
        x = x1 + x2
        x = F.gelu(x)

        x1 = self.conv1(x)
        x2 = self.w1(x)
        x = x1 + x2
        x = F.gelu(x)

        x1 = self.conv2(x)
        x2 = self.w2(x)
        x = x1 + x2
        x = F.gelu(x)

        x1 = self.conv3(x)
        x2 = self.w3(x)
        x = x1 + x2

        x = x[..., :-self.padding, :-self.padding]
        x = x.permute(0, 2, 3, 1)
        x = self.fc2(x)
        x = F.gelu(x)
        x = self.fc3(x)

        return x

    def get_grid(self, shape, device):
        batchsize, size_x, size_y = shape[0], shape[1], shape[2]
        gridx = torch.tensor(np.linspace(0, 1, size_x), dtype=torch.float)
        gridx = gridx.reshape(1, size_x, 1, 1).repeat([batchsize, 1, size_y, 1])
        gridy = torch.tensor(np.linspace(0, 1, size_y), dtype=torch.float)
        gridy = gridy.reshape(1, 1, size_y, 1).repeat([batchsize, size_x, 1, 1])
        return torch.cat((gridx, gridy), dim=-1).to(device)


pretrain = False
finetune = not pretrain

TRAIN_PATH = '../data/darcy_s61_N1200.mat'
TEST_PATH = '../data/darcy_s61_N1200.mat'
# TRAIN_PATH = '../data/lognormal_N1024_s61.mat'
# TEST_PATH = '../data/lognormal_N1024_s61.mat'
# TRAIN_PATH = '../data/piececonst_r241_N1024_smooth1.mat'
# TEST_PATH = '../data/piececonst_r241_N1024_smooth2.mat'

ntrain = 1000
ntest = 1

batch_size = 1
learning_rate = 0.001

epochs = 100
step_size = 100
gamma = 0.5

modes = 12
width = 32

r = 1
h = int(((61 - 1)/r) + 1)
s = h

print(s)

path = 'PINO_FDM_darcy_N'+str(ntrain)+'_ep' + str(epochs) + '_m' + str(modes) + '_w' + str(width)
path_model = '../model/'+path
path_pred = '../pred/'+path+'.mat'

reader = MatReader(TRAIN_PATH)
# x_train = reader.read_field('coeff')[:ntrain,::r,::r][:,:s,:s]
# y_train = reader.read_field('sol')[:ntrain,::r,::r][:,:s,:s]
x_train = reader.read_field('input')[:ntrain,::r,::r][:,:s,:s]
y_train = reader.read_field('output')[:ntrain,::r,::r][:,:s,:s]

reader.load_file(TEST_PATH)
# x_test = reader.read_field('coeff')[-ntest:,::r,::r][:,:s,:s]
# y_test = reader.read_field('sol')[-ntest:,::r,::r][:,:s,:s]
a = 1
x_test = reader.read_field('input')[-ntest-a:-a,::r,::r][:,:s,:s]
y_test = reader.read_field('output')[-ntest-a:-a,::r,::r][:,:s,:s]


print(torch.mean(x_train), torch.mean(y_train))

# x_normalizer = UnitGaussianNormalizer(x_train)
# x_train = x_normalizer.encode(x_train)
# x_test = x_normalizer.encode(x_test)
#
# y_normalizer = UnitGaussianNormalizer(y_train)
# y_train = y_normalizer.encode(y_train)

grids = []
grids.append(np.linspace(0, 1, s))
grids.append(np.linspace(0, 1, s))
grid = np.vstack([xx.ravel() for xx in np.meshgrid(*grids)]).T
grid = grid.reshape(1,s,s,2)
grid = torch.tensor(grid, dtype=torch.float)

myloss = LpLoss(size_average=False)


def FDM_Darcy(u, a, D=1, f=1):
    batchsize = u.size(0)
    size = u.size(1)
    u = u.reshape(batchsize, size, size)
    a = a.reshape(batchsize, size, size)
    dx = D / (size - 1)
    dy = dx

    # ux: (batch, size-2, size-2)
    ux = (u[:, 2:, 1:-1] - u[:, :-2, 1:-1]) / (2 * dx)
    uy = (u[:, 1:-1, 2:] - u[:, 1:-1, :-2]) / (2 * dy)

    ax = (a[:, 2:, 1:-1] - a[:, :-2, 1:-1]) / (2 * dx)
    ay = (a[:, 1:-1, 2:] - a[:, 1:-1, :-2]) / (2 * dy)
    uxx = (u[:, 2:, 1:-1] -2*u[:,1:-1,1:-1] +u[:, :-2, 1:-1]) / (dx**2)
    uyy = (u[:, 1:-1, 2:] -2*u[:,1:-1,1:-1] +u[:, 1:-1, :-2]) / (dy**2)

    a = a[:, 1:-1, 1:-1]
    u = u[:, 1:-1, 1:-1]
    # Du = -(ax*ux + ay*uy + a*uxx + a*uyy)

    # inner1 = torch.mean(a*(ux**2 + uy**2), dim=[1,2])
    # inner2 = torch.mean(f*u, dim=[1,2])
    # return 0.5*inner1 - inner2

    aux = a * ux
    auy = a * uy
    auxx = (aux[:, 2:, 1:-1] - aux[:, :-2, 1:-1]) / (2 * dx)
    auyy = (auy[:, 1:-1, 2:] - auy[:, 1:-1, :-2]) / (2 * dy)
    Du = - (auxx + auyy)

    return Du


def PINO_loss(u, a):
    batchsize = u.size(0)
    size = u.size(1)
    u = u.reshape(batchsize, size, size)
    a = a.reshape(batchsize, size, size)
    lploss = LpLoss(size_average=True)

    index_x = torch.cat([torch.tensor(range(0, size)), (size - 1) * torch.ones(size), torch.tensor(range(size-1, 1, -1)),
                         torch.zeros(size)], dim=0).long()
    index_y = torch.cat([(size - 1) * torch.ones(size), torch.tensor(range(size-1, 1, -1)), torch.zeros(size),
                         torch.tensor(range(0, size))], dim=0).long()

    boundary_u = u[:, index_x, index_y]
    truth_u = torch.zeros(boundary_u.shape, device=u.device)
    loss_bd = lploss.abs(boundary_u, truth_u)

    Du = FDM_Darcy(u, a)
    f = torch.ones(Du.shape, device=u.device)
    loss_f = lploss(Du, f)

    # im = (Du-f)[0].detach().cpu().numpy()
    # plt.imshow(im)
    # plt.show()

    # loss_f = FDM_Darcy(u, a)
    # loss_f = torch.mean(loss_f)
    return loss_f, loss_bd

error = np.zeros((epochs, 4))
# x_normalizer.cuda()
# y_normalizer.cuda()
grid = grid.cuda()
mollifier = torch.sin(np.pi*grid[...,0]) * torch.sin(np.pi*grid[...,1]) * 0.001

def darcy_mask1(x):
    return  1 / (1 + torch.exp(-x)) * 9 + 3

def darcy_mask2(x):
    x = 1 / (1 + torch.exp(-x))
    x[x>0.5] = 1
    x[x<=0.5] = 0
    # x = torch.tensor(x>0.5, dtype=torch.float)
    return  x * 9 + 3

def total_variance(x):
    return torch.mean(torch.abs(x[...,:-1] - x[...,1:])) + torch.mean(torch.abs(x[...,:-1,:] - x[...,1:,:]))

if pretrain:
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_test, y_test), batch_size=batch_size,
                                              shuffle=False)

    model = FNO2d(modes, modes, width).cuda()
    num_param = count_params(model)
    print(num_param)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=step_size)

    for ep in range(epochs):
        model.train()
        t1 = default_timer()
        train_f = 0.0
        train_l2 = 0.0
        train_TV = 0.0
        for x, y in train_loader:
            x, y = x.cuda(), y.cuda()

            optimizer.zero_grad()
            xout = model(y.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
            xout = darcy_mask1(xout)

            loss_data = myloss(xout.view(batch_size,-1), x.view(batch_size,-1))
            loss_f, loss_bd = PINO_loss(y, xout)
            loss_TV = total_variance(xout)
            pino_loss = 0.2*loss_f + loss_data + 0.01*loss_TV
            pino_loss.backward()

            optimizer.step()
            train_l2 += loss_data.item()
            train_f += loss_f.item()
            train_TV += loss_TV.item()


        scheduler.step()

        model.eval()
        test_l2 = 0.0
        test_pino = 0.0
        with torch.no_grad():
            for x, y in test_loader:
                x, y = x.cuda(), y.cuda()

                xout = model(y.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
                xout = darcy_mask1(xout)

                test_l2 += myloss(xout.view(batch_size, -1), x.view(batch_size, -1)).item()


        train_l2 /= ntrain
        test_l2 /= ntest
        train_f /= ntrain
        test_pino /= ntest
        train_TV /= ntrain

        error[ep] = [train_f, train_l2, test_pino, test_l2]

        t2 = default_timer()
        print(ep, t2-t1, train_f, train_TV, train_l2, test_l2)

    torch.save(model, '../model/IP-dracy-inverse')

if finetune:
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_test, y_test), batch_size=batch_size,
                                              shuffle=False)

    model = torch.load('../model/IP-dracy-inverse').cuda()
    # model = FNO2d(modes, modes, width).cuda()
    model_forward = torch.load('../model/IP-dracy-forward').cuda()
    num_param = count_params(model)
    print(num_param)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2000, gamma=0.5)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=step_size)

    for ep in range(10000):
        model.train()
        t1 = default_timer()

        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            optimizer.zero_grad()

            xout = model(y.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s)
            xout1 = darcy_mask1(xout)
            loss_TV = total_variance(xout)

            loss_f, loss_bd = PINO_loss(y, xout1)
            pino_loss = loss_f + 0.05*loss_TV
            pino_loss.backward()
            optimizer.step()
            scheduler.step()

            xout2 = darcy_mask2(xout)
            testx_l2 = myloss(xout1.view(batch_size, -1), x.view(batch_size, -1)).item()

        t2 = default_timer()
        print(ep, t2 - t1, loss_f.item(), testx_l2)

        if ep % 1000 == 0:
            yout1 = model_forward(xout1.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s) * mollifier
            yout2 = model_forward(xout2.reshape(batch_size, s, s, 1)).reshape(batch_size, s, s) * mollifier

            fig, axs = plt.subplots(2, 3, figsize=(8, 8))
            axs[0,0].imshow(x.reshape(s,s).detach().cpu().numpy())
            axs[0,1].imshow(xout1.reshape(s,s).detach().cpu().numpy())
            axs[0,2].imshow(xout2.reshape(s,s).detach().cpu().numpy())
            axs[1,0].imshow(y.reshape(s,s).detach().cpu().numpy())
            axs[1,1].imshow(yout1.reshape(s,s).detach().cpu().numpy())
            axs[1,2].imshow(yout2.reshape(s,s).detach().cpu().numpy())
            plt.show()

            scipy.io.savemat('../pred/IP-darcy-backward.mat', mdict={'input_truth': x.reshape(s,s).detach().cpu().numpy(),
                                               'input_pred': xout1.reshape(s,s).detach().cpu().numpy(),
                                                'output_truth': y.reshape(s,s).detach().cpu().numpy(),
                                                'output_pred': yout1.reshape(s,s).detach().cpu().numpy()})


================================================
FILE: models/FCN.py
================================================
import torch.nn as nn


def linear_block(in_channel, out_channel):
    block = nn.Sequential(
        nn.Linear(in_channel, out_channel),
        nn.Tanh()
    )
    return block


class FCNet(nn.Module):
    '''
    Fully connected layers with Tanh as nonlinearity
    Reproduced from PINNs Burger equation
    '''

    def __init__(self, layers=[2, 10, 1]):
        super(FCNet, self).__init__()

        fc_list = [linear_block(in_size, out_size)
                   for in_size, out_size in zip(layers, layers[1:-1])]
        fc_list.append(nn.Linear(layers[-2], layers[-1]))
        self.fc = nn.Sequential(*fc_list)

    def forward(self, x):
        return self.fc(x)


class DenseNet(nn.Module):
    def __init__(self, layers, nonlinearity, out_nonlinearity=None, normalize=False):
        super(DenseNet, self).__init__()

        self.n_layers = len(layers) - 1
        assert self.n_layers >= 1
        if isinstance(nonlinearity, str):
            if nonlinearity == 'tanh':
                nonlinearity = nn.Tanh
            elif nonlinearity == 'relu':
                nonlinearity == nn.ReLU
            else:
                raise ValueError(f'{nonlinearity} is not supported')
        self.layers = nn.ModuleList()

        for j in range(self.n_layers):
            self.layers.append(nn.Linear(layers[j], layers[j+1]))

            if j != self.n_layers - 1:
                if normalize:
                    self.layers.append(nn.BatchNorm1d(layers[j+1]))

                self.layers.append(nonlinearity())

        if out_nonlinearity is not None:
            self.layers.append(out_nonlinearity())

    def forward(self, x):
        for _, l in enumerate(self.layers):
            x = l(x)

        return x


================================================
FILE: models/__init__.py
================================================
from .FCN import FCNet
from .fourier1d import FNO1d
from .fourier2d import FNO2d
from .fourier3d import FNO3d


================================================
FILE: models/basics.py
================================================
import numpy as np

import torch
import torch.nn as nn


@torch.jit.script
def compl_mul1d(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x)
    res = torch.einsum("bix,iox->box", a, b)
    return res


@torch.jit.script
def compl_mul2d(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    # (batch, in_channel, x,y,t ), (in_channel, out_channel, x,y,t) -> (batch, out_channel, x,y,t)
    res =  torch.einsum("bixy,ioxy->boxy", a, b)
    return res


@torch.jit.script
def compl_mul3d(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
    res = torch.einsum("bixyz,ioxyz->boxyz", a, b)
    return res

################################################################
# 1d fourier layer
################################################################


class SpectralConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1):
        super(SpectralConv1d, self).__init__()

        """
        1D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        # Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes1 = modes1

        self.scale = (1 / (in_channels*out_channels))
        self.weights1 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, dtype=torch.cfloat))

    def forward(self, x):
        batchsize = x.shape[0]
        # Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfftn(x, dim=[2])

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.in_channels, x.size(-1)//2 + 1, device=x.device, dtype=torch.cfloat)
        out_ft[:, :, :self.modes1] = compl_mul1d(x_ft[:, :, :self.modes1], self.weights1)

        # Return to physical space
        x = torch.fft.irfftn(out_ft, s=[x.size(-1)], dim=[2])
        return x

################################################################
# 2d fourier layer
################################################################


class SpectralConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2):
        super(SpectralConv2d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        # Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes1 = modes1
        self.modes2 = modes2

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))
        self.weights2 = nn.Parameter(
            self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))

    def forward(self, x):
        batchsize = x.shape[0]
        size1 = x.shape[-2]
        size2 = x.shape[-1]
        # Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfftn(x, dim=[2, 3])

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.out_channels, x.size(-2), x.size(-1) // 2 + 1, device=x.device,
                                dtype=torch.cfloat)
        out_ft[:, :, :self.modes1, :self.modes2] = \
            compl_mul2d(x_ft[:, :, :self.modes1, :self.modes2], self.weights1)
        out_ft[:, :, -self.modes1:, :self.modes2] = \
            compl_mul2d(x_ft[:, :, -self.modes1:, :self.modes2], self.weights2)

        # Return to physical space
        x = torch.fft.irfftn(out_ft, s=(x.size(-2), x.size(-1)), dim=[2, 3])
        return x


class SpectralConv3d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2, modes3):
        super(SpectralConv3d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1  #Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes2 = modes2
        self.modes3 = modes3

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))
        self.weights2 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))
        self.weights3 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))
        self.weights4 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))

    def forward(self, x):
        batchsize = x.shape[0]
        # Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfftn(x, dim=[2,3,4])
        
        z_dim = min(x_ft.shape[4], self.modes3)
        
        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.out_channels, x_ft.shape[2], x_ft.shape[3], self.modes3, device=x.device, dtype=torch.cfloat)
        
        # if x_ft.shape[4] > self.modes3, truncate; if x_ft.shape[4] < self.modes3, add zero padding 
        coeff = torch.zeros(batchsize, self.in_channels, self.modes1, self.modes2, self.modes3, device=x.device, dtype=torch.cfloat)        
        coeff[..., :z_dim] = x_ft[:, :, :self.modes1, :self.modes2, :z_dim]
        out_ft[:, :, :self.modes1, :self.modes2, :] = compl_mul3d(coeff, self.weights1)
        
        coeff = torch.zeros(batchsize, self.in_channels, self.modes1, self.modes2, self.modes3, device=x.device, dtype=torch.cfloat)        
        coeff[..., :z_dim] = x_ft[:, :, -self.modes1:, :self.modes2, :z_dim]
        out_ft[:, :, -self.modes1:, :self.modes2, :] = compl_mul3d(coeff, self.weights2)
        
        coeff = torch.zeros(batchsize, self.in_channels, self.modes1, self.modes2, self.modes3, device=x.device, dtype=torch.cfloat)        
        coeff[..., :z_dim] = x_ft[:, :, :self.modes1, -self.modes2:, :z_dim]
        out_ft[:, :, :self.modes1, -self.modes2:, :] = compl_mul3d(coeff, self.weights3)
        
        coeff = torch.zeros(batchsize, self.in_channels, self.modes1, self.modes2, self.modes3, device=x.device, dtype=torch.cfloat)        
        coeff[..., :z_dim] = x_ft[:, :, -self.modes1:, -self.modes2:, :z_dim]
        out_ft[:, :, -self.modes1:, -self.modes2:, :] = compl_mul3d(coeff, self.weights4)

        #Return to physical space
        x = torch.fft.irfftn(out_ft, s=(x.size(2), x.size(3), x.size(4)), dim=[2,3,4])
        return x


class FourierBlock(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2, modes3, act='tanh'):
        super(FourierBlock, self).__init__()
        self.in_channel = in_channels
        self.out_channel = out_channels
        self.speconv = SpectralConv3d(in_channels, out_channels, modes1, modes2, modes3)
        self.linear = nn.Conv1d(in_channels, out_channels, 1)
        if act == 'tanh':
            self.act = torch.tanh_
        elif act == 'gelu':
            self.act = nn.GELU
        elif act == 'none':
            self.act = None
        else:
            raise ValueError(f'{act} is not supported')

    def forward(self, x):
        '''
        input x: (batchsize, channel width, x_grid, y_grid, t_grid)
        '''
        x1 = self.speconv(x)
        x2 = self.linear(x.view(x.shape[0], self.in_channel, -1))
        out = x1 + x2.view(x.shape[0], self.out_channel, x.shape[2], x.shape[3], x.shape[4])
        if self.act is not None:
            out = self.act(out)
        return out


================================================
FILE: models/core.py
================================================
import torch
import torch.nn as nn
import tltorch


@torch.jit.script
def contract_1D(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: 
    res = torch.einsum("bix,iox->box", a, b)
    return res


@torch.jit.script
def contract_2D(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: 
    res = torch.einsum("bixy,ioxy->boxy", a, b)
    return res


@torch.jit.script
def contract_3D(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: 
    res = torch.einsum("bixyz,ioxyz->boxyz", a, b)
    return res


class FactorizedSpectralConv3d(nn.Module):
    def __init__(self, in_channels, out_channels, modes_height, modes_width, modes_depth, n_layers=1, bias=True, scale='auto',
                 fft_norm='backward', mlp=False,
                 rank=0.5, factorization='cp', fixed_rank_modes=None, decomposition_kwargs=dict(), **kwargs):
        super().__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes_height = modes_height
        self.modes_width = modes_width
        self.modes_depth = modes_depth
        self.rank = rank
        self.factorization = factorization
        self.n_layers = n_layers
        self.fft_norm = fft_norm
        if mlp:
            raise NotImplementedError()
        else:
            self.mlp = None

        if scale == 'auto':
            scale = (1 / (in_channels * out_channels))

        if isinstance(fixed_rank_modes, bool):
            if fixed_rank_modes:
                fixed_rank_modes=[0]
            else:
                fixed_rank_modes=None

        if factorization is None:
            self.weight = nn.Parameter(scale * torch.randn(4*n_layers, in_channels, out_channels, self.modes_height, self.modes_width, self.modes_depth,
                                                            dtype=torch.cfloat))
            self._get_weight = self._get_weight_dense
        else:
            self.weight = tltorch.FactorizedTensor.new((4*n_layers, in_channels, out_channels, self.modes_height, self.modes_width, self.modes_depth),
                                                        rank=self.rank, factorization=factorization, 
                                                        dtype=torch.cfloat, fixed_rank_modes=fixed_rank_modes,
                                                        **decomposition_kwargs)
            self.weight = self.weight.normal_(0, scale)
            self._get_weight = self._get_weight_factorized

        if bias:
            self.bias = nn.Parameter(scale * torch.randn(self.out_channels, 1, 1, 1))
        else:
            self.bias = 0

    def _get_weight_factorized(self, layer_index, corner_index):
        """Get the weights corresponding to a particular layer,
        corner of the Fourier coefficient (top=0 or bottom=1) -- corresponding to lower frequencies
        and complex_index (real=0 or imaginary=1)
        """
        return self.weight()[4*layer_index + corner_index, :, :, :, :, :].to_tensor().contiguous()

    def _get_weight_dense(self, layer_index, corner_index):
        """Get the weights corresponding to a particular layer,
        corner of the Fourier coefficient (top=0 or bottom=1) -- corresponding to lower frequencies
        and complex_index (real=0 or imaginary=1)
        """
        return self.weight[4*layer_index + corner_index, :, :, :, :, :]

    def forward(self, x, indices=0):
        with torch.autocast(device_type='cuda', enabled=False):
            batchsize, channels, height, width, depth = x.shape
            dtype = x.dtype
            # out_fft = torch.zeros(x.shape, device=x.device) 

            #Compute Fourier coeffcients 
            x = torch.fft.rfftn(x.float(), norm=self.fft_norm, dim=[-3, -2, -1])

            # Multiply relevant Fourier modes        
            # x = torch.view_as_real(x)
            # The output will be of size (batch_size, self.out_channels, x.size(-2), x.size(-1)//2 + 1)
            out_fft = torch.zeros([batchsize, self.out_channels,  height, width, depth//2 + 1], device=x.device, dtype=torch.cfloat)

            out_fft[:, :, :self.modes_height, :self.modes_width, :self.modes_depth] = contract_3D(
                x[:, :, :self.modes_height, :self.modes_width, :self.modes_depth], self._get_weight(indices, 0))
            out_fft[:, :, -self.modes_height:, :self.modes_width, :self.modes_depth] = contract_3D(
                x[:, :, -self.modes_height:, :self.modes_width, :self.modes_depth], self._get_weight(indices, 1))
            out_fft[:, :, self.modes_height:, -self.modes_width:, :self.modes_depth] = contract_3D(
                x[:, :, self.modes_height:, -self.modes_width:, :self.modes_depth], self._get_weight(indices, 2))
            out_fft[:, :, -self.modes_height:, -self.modes_width:, :self.modes_depth] = contract_3D(
                x[:, :, -self.modes_height:, -self.modes_width:, :self.modes_depth], self._get_weight(indices, 3))

            # out_size = (int(height*super_res), int(width*super_res))
            x = torch.fft.irfftn(out_fft, s=(height, width, depth), norm=self.fft_norm).type(dtype) #(x.size(-2), x.size(-1))) +
            x = x + self.bias

        if self.mlp is not None:
            x = self.mlp(x)

        return x

    def get_conv(self, indices):
        """Returns a sub-convolutional layer from the joint parametrize main-convolution
        The parametrization of sub-convolutional layers is shared with the main one.
        """
        if self.n_layers == 1:
            raise ValueError('A single convolution is parametrized, directly use the main class.')
        
        return SubConv(self, indices)
    
    def __getitem__(self, indices):
        return self.get_conv(indices)


class FactorizedSpectralConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, modes_height, modes_width, n_layers=1, bias=True, scale='auto',
                 fft_norm='backward',
                 rank=0.5, factorization='cp', fixed_rank_modes=None, decomposition_kwargs=dict(), **kwargs):
        super().__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes_height = modes_height
        self.modes_width = modes_width
        self.rank = rank
        self.factorization = factorization
        self.n_layers = n_layers
        self.fft_norm = fft_norm

        if scale == 'auto':
            scale = (1 / (in_channels * out_channels))

        if isinstance(fixed_rank_modes, bool):
            if fixed_rank_modes:
                fixed_rank_modes=[0]
            else:
                fixed_rank_modes=None

        if factorization is None:
            self.weight = nn.Parameter(scale * torch.randn(2*n_layers, in_channels, out_channels, self.modes_height, self.modes_width,
                                                            dtype=torch.cfloat))
            self._get_weight = self._get_weight_dense
        else:
            self.weight = tltorch.FactorizedTensor.new((2*n_layers, in_channels, out_channels, self.modes_height, self.modes_width),
                                                        rank=self.rank, factorization=factorization, 
                                                        dtype=torch.cfloat, fixed_rank_modes=fixed_rank_modes,
                                                        **decomposition_kwargs)
            self.weight = self.weight.normal_(0, scale)
            self._get_weight = self._get_weight_factorized

        if bias:
            self.bias = nn.Parameter(scale * torch.randn(self.out_channels, 1, 1))
        else:
            self.bias = 0

    def _get_weight_factorized(self, layer_index, corner_index):
        """Get the weights corresponding to a particular layer,
        corner of the Fourier coefficient (top=0 or bottom=1) -- corresponding to lower frequencies
        and complex_index (real=0 or imaginary=1)
        """
        return self.weight()[2*layer_index + corner_index, :, :, :, : ].to_tensor().contiguous()

    def _get_weight_dense(self, layer_index, corner_index):
        """Get the weights corresponding to a particular layer,
        corner of the Fourier coefficient (top=0 or bottom=1) -- corresponding to lower frequencies
        and complex_index (real=0 or imaginary=1)
        """
        return self.weight[2*layer_index + corner_index, :, :, :, :]

    def forward(self, x, indices=0, super_res=1):
        with torch.autocast(device_type='cuda', enabled=False):
            batchsize, channels, height, width = x.shape
            dtype = x.dtype
            # out_fft = torch.zeros(x.shape, device=x.device) 

            #Compute Fourier coeffcients 
            x = torch.fft.rfft2(x.float(), norm=self.fft_norm)

            # Multiply relevant Fourier modes        
            # x = torch.view_as_real(x)
            # The output will be of size (batch_size, self.out_channels, x.size(-2), x.size(-1)//2 + 1)
            out_fft = torch.zeros([batchsize, self.out_channels,  height, width//2 + 1], device=x.device, dtype=torch.cfloat)

            # upper block (truncate high freq)
            out_fft[:, :, :self.modes_height, :self.modes_width:super_res] = contract_2D(x[:, :, :self.modes_height, :self.modes_width], self._get_weight(indices, 0))
            # Lower block    
            out_fft[:, :, -self.modes_height:, :self.modes_width:super_res] = contract_2D(x[:, :, -self.modes_height:, :self.modes_width], self._get_weight(indices, 1))

            out_size = (int(height*super_res), int(width*super_res))
            x = torch.fft.irfft2(out_fft, s=out_size, norm=self.fft_norm).type(dtype) #(x.size(-2), x.size(-1)))

            return x + self.bias

    def get_conv(self, indices):
        """Returns a sub-convolutional layer from the joint parametrize main-convolution
        The parametrization of sub-convolutional layers is shared with the main one.
        """
        if self.n_layers == 1:
            raise ValueError('A single convolution is parametrized, directly use the main class.')
        
        return SubConv(self, indices)
    
    def __getitem__(self, indices):
        return self.get_conv(indices)


class SubConv(nn.Module):
    """Class representing one of the convolutions from the mother joint factorized convolution
    Notes
    -----
    This relies on the fact that nn.Parameters are not duplicated:
    if the same nn.Parameter is assigned to multiple modules, they all point to the same data, 
    which is shared.
    """
    def __init__(self, main_conv, indices):
        super().__init__()
        self.main_conv = main_conv
        self.indices = indices
    
    def forward(self, x, **kwargs):
        return self.main_conv.forward(x, self.indices, **kwargs)


class FactorizedSpectralConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, modes, n_layers=1, 
                 bias=True, scale='auto', fft_norm='forward', rank=0.5, 
                 factorization='tucker', fixed_rank_modes=None, decomposition_kwargs=dict()):
        super().__init__()

        #Joint factorization only works for the same in and out channels
        if n_layers > 1:
            assert in_channels == out_channels

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes = modes
        self.rank = rank
        self.factorization = factorization
        self.n_layers = n_layers
        self.fft_norm = fft_norm

        if scale == 'auto':
            scale = (1 / (in_channels * out_channels))

        if isinstance(fixed_rank_modes, bool):
            if fixed_rank_modes:
                fixed_rank_modes=[0]
            else:
                fixed_rank_modes=None

        if factorization is None:
            self.weight = nn.Parameter(scale * torch.randn(n_layers, in_channels, out_channels, self.modes,
                                                            dtype=torch.cfloat))
            self._get_weight = self._get_weight_dense
        else:
            self.weight = tltorch.FactorizedTensor.new((n_layers, in_channels, out_channels, self.modes),
                                                        rank=self.rank, factorization=factorization, 
                                                        dtype=torch.cfloat, fixed_rank_modes=fixed_rank_modes,
                                                        **decomposition_kwargs)
            self.weight.normal_(0, scale)
            self._get_weight = self._get_weight_factorized

        if bias:
            self.bias = nn.Parameter(scale * torch.randn(1, self.out_channels, 1))
        else:
            self.bias = 0

    def _get_weight_factorized(self, layer_index):
        #Get the weights corresponding to a particular layer
        return self.weight()[layer_index, :, :, : ].to_tensor().contiguous()

    def _get_weight_dense(self, layer_index):
        #Get the weights corresponding to a particular layer
        return self.weight[layer_index, :, :, :]

    def forward(self, x, indices=0, s=None):
        batchsize, channels, width = x.shape
        dtype = x.dtype
        
        if s is None:
            s = width

        #Compute Fourier coeffcients 
        x = torch.fft.rfft(x, norm=self.fft_norm)

        # Multiply relevant Fourier modes        
        out_fft = torch.zeros([batchsize, self.out_channels,  width//2 + 1], device=x.device, dtype=torch.cfloat)
        out_fft[:, :, :self.modes] = contract_1D(x[:, :, :self.modes], self._get_weight(indices))

        #Return to physical space
        x = torch.fft.irfft(out_fft, n=s, norm=self.fft_norm).type(dtype)

        return x + self.bias

    def get_conv(self, indices):
        """Returns a sub-convolutional layer from the joint parametrize main-convolution
        The parametrization of sub-convolutional layers is shared with the main one.
        """
        if self.n_layers == 1:
            raise ValueError('A single convolution is parametrized, directly use the main class.')
        
        return SubConv(self, indices)
    
    def __getitem__(self, indices):
        return self.get_conv(indices)
        

class JointFactorizedSpectralConv1d(nn.Module):
    def __init__(self, modes, width, n_layers=1, joint_factorization=True, in_channels=2, scale='auto',
                 non_linearity=nn.GELU, rank=1.0, factorization='tucker', bias=True,
                 fixed_rank_modes=False, fft_norm='forward', decomposition_kwargs=dict()):
        super().__init__()

        if isinstance(modes, int):
            self.modes = [modes for _ in range(n_layers)]
        else:
            self.modes = modes

        if isinstance(width, int):
            self.width = [width for _ in range(n_layers)]
        else:
            self.width = width

        assert len(self.width) == len(self.modes)

        self.n_layers = len(self.width)
        self.joint_factorization = joint_factorization

        if self.joint_factorization:
            assert self.width.count(self.width[0]) == self.n_layers and self.modes.count(self.modes[0]) == self.n_layers
            self.in_channels = self.width[0]
        else:
            self.in_channels = in_channels
        
        self.width = [self.in_channels] + self.width

        self.scale = scale
        self.non_linearity = non_linearity()
        self.rank = rank
        self.factorization = factorization
        self.bias = bias
        self.fixed_rank_modes = fixed_rank_modes
        self.decomposition_kwargs = decomposition_kwargs
        self.fft_norm = fft_norm
    
        if joint_factorization:
            self.convs = FactorizedSpectralConv1d(self.in_channels, self.width[0], self.modes[0],
                                                  n_layers=self.n_layers,
                                                  bias=self.bias,
                                                  scale=self.scale,
                                                  fft_norm=self.fft_norm,
                                                  rank=self.rank,
                                                  factorization=self.factorization,
                                                  fixed_rank_modes=self.fixed_rank_modes,
                                                  decomposition_kwargs=decomposition_kwargs)
        else:
            self.convs = nn.ModuleList([FactorizedSpectralConv1d(self.width[j], self.width[j+1], self.modes[j],
                                                                 n_layers=1,
                                                                 bias=self.bias,
                                                                 scale=self.scale,
                                                                 fft_norm=self.fft_norm,
                                                                 rank=self.rank,
                                                                 factorization=self.factorization,
                                                                 fixed_rank_modes=self.fixed_rank_modes,
                                                                 decomposition_kwargs=decomposition_kwargs) for j in range(self.n_layers)])

        self.linears = nn.ModuleList([nn.Conv1d(self.width[j], self.width[j+1], 1) for j in range(self.n_layers)])
        
    def forward(self, x, s=None):

        if s is None:
            s = [None for _ in range(self.n_layers)]

        if isinstance(s, int):
            s = [None for _ in range(self.n_layers-1)] + [s]

        for j in range(self.n_layers):
            x1 = self.convs[j](x, s=s[j])

            #Fourier interpolation
            if s[j] is not None:
                x2 = torch.fft.irfft(torch.fft.rfft(x, norm=self.fft_norm), n=s[j], norm=self.fft_norm)
            else:
                x2 = x
            
            x2 = self.linears[j](x2)

            x = x1 + x2

            if j < (self.n_layers - 1):
                x = self.non_linearity(x)

        return x

================================================
FILE: models/fourier1d.py
================================================
import torch.nn as nn
from .basics import SpectralConv1d
from .utils import _get_act


class FNO1d(nn.Module):
    def __init__(self,
                 modes, width=32,
                 layers=None,
                 fc_dim=128,
                 in_dim=2, out_dim=1,
                 act='relu'):
        super(FNO1d, self).__init__()

        """
        The overall network. It contains several layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .

        input: the solution of the initial condition and location (a(x), x)
        input shape: (batchsize, x=s, c=2)
        output: the solution of a later timestep
        output shape: (batchsize, x=s, c=1)
        """

        self.modes1 = modes
        self.width = width
        if layers is None:
            layers = [width] * 4

        self.fc0 = nn.Linear(in_dim, layers[0])  # input channel is 2: (a(x), x)

        self.sp_convs = nn.ModuleList([SpectralConv1d(
            in_size, out_size, num_modes) for in_size, out_size, num_modes in zip(layers, layers[1:], self.modes1)])

        self.ws = nn.ModuleList([nn.Conv1d(in_size, out_size, 1)
                                 for in_size, out_size in zip(layers, layers[1:])])

        self.fc1 = nn.Linear(layers[-1], fc_dim)
        self.fc2 = nn.Linear(fc_dim, out_dim)
        self.act = _get_act(act)

    def forward(self, x):
        length = len(self.ws)

        x = self.fc0(x)
        x = x.permute(0, 2, 1)

        for i, (speconv, w) in enumerate(zip(self.sp_convs, self.ws)):
            x1 = speconv(x)
            x2 = w(x)
            x = x1 + x2
            if i != length - 1:
                x = self.act(x)

        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        return x


================================================
FILE: models/fourier2d.py
================================================
import torch.nn as nn
from .basics import SpectralConv2d
from .utils import _get_act, add_padding2, remove_padding2


class FNO2d(nn.Module):
    def __init__(self, modes1, modes2,
                 width=64, fc_dim=128,
                 layers=None,
                 in_dim=3, out_dim=1,
                 act='gelu', 
                 pad_ratio=[0., 0.]):
        super(FNO2d, self).__init__()
        """
        Args:
            - modes1: list of int, number of modes in first dimension in each layer
            - modes2: list of int, number of modes in second dimension in each layer
            - width: int, optional, if layers is None, it will be initialized as [width] * [len(modes1) + 1] 
            - in_dim: number of input channels
            - out_dim: number of output channels
            - act: activation function, {tanh, gelu, relu, leaky_relu}, default: gelu
            - pad_ratio: list of float, or float; portion of domain to be extended. If float, paddings are added to the right. 
            If list, paddings are added to both sides. pad_ratio[0] pads left, pad_ratio[1] pads right. 
        """
        if isinstance(pad_ratio, float):
            pad_ratio = [pad_ratio, pad_ratio]
        else:
            assert len(pad_ratio) == 2, 'Cannot add padding in more than 2 directions'
        self.modes1 = modes1
        self.modes2 = modes2
    
        self.pad_ratio = pad_ratio
        # input channel is 3: (a(x, y), x, y)
        if layers is None:
            self.layers = [width] * (len(modes1) + 1)
        else:
            self.layers = layers
        self.fc0 = nn.Linear(in_dim, layers[0])

        self.sp_convs = nn.ModuleList([SpectralConv2d(
            in_size, out_size, mode1_num, mode2_num)
            for in_size, out_size, mode1_num, mode2_num
            in zip(self.layers, self.layers[1:], self.modes1, self.modes2)])

        self.ws = nn.ModuleList([nn.Conv1d(in_size, out_size, 1)
                                 for in_size, out_size in zip(self.layers, self.layers[1:])])

        self.fc1 = nn.Linear(layers[-1], fc_dim)
        self.fc2 = nn.Linear(fc_dim, layers[-1])
        self.fc3 = nn.Linear(layers[-1], out_dim)
        self.act = _get_act(act)

    def forward(self, x):
        '''
        Args:
            - x : (batch size, x_grid, y_grid, 2)
        Returns:
            - x: (batch size, x_grid, y_grid, 1)
        '''
        size_1, size_2 = x.shape[1], x.shape[2]
        if max(self.pad_ratio) > 0:
            num_pad1 = [round(i * size_1) for i in self.pad_ratio]
            num_pad2 = [round(i * size_2) for i in self.pad_ratio]
        else:
            num_pad1 = num_pad2 = [0.]

        length = len(self.ws)
        batchsize = x.shape[0]
        x = self.fc0(x)
        x = x.permute(0, 3, 1, 2)   # B, C, X, Y
        x = add_padding2(x, num_pad1, num_pad2)
        size_x, size_y = x.shape[-2], x.shape[-1]

        for i, (speconv, w) in enumerate(zip(self.sp_convs, self.ws)):
            x1 = speconv(x)
            x2 = w(x.view(batchsize, self.layers[i], -1)).view(batchsize, self.layers[i+1], size_x, size_y)
            x = x1 + x2
            if i != length - 1:
                x = self.act(x)
        x = remove_padding2(x, num_pad1, num_pad2)
        x = x.permute(0, 2, 3, 1)
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.act(x)
        x = self.fc3(x)
        return x


================================================
FILE: models/fourier3d.py
================================================
import torch.nn as nn
from .basics import SpectralConv3d
from .utils import add_padding, remove_padding, _get_act


class FNO3d(nn.Module):
    def __init__(self, 
                 modes1, modes2, modes3,
                 width=16, 
                 fc_dim=128,
                 layers=None,
                 in_dim=4, out_dim=1,
                 act='gelu', 
                 pad_ratio=[0., 0.]):
        '''
        Args:
            modes1: list of int, first dimension maximal modes for each layer
            modes2: list of int, second dimension maximal modes for each layer
            modes3: list of int, third dimension maximal modes for each layer
            layers: list of int, channels for each layer
            fc_dim: dimension of fully connected layers
            in_dim: int, input dimension
            out_dim: int, output dimension
            act: {tanh, gelu, relu, leaky_relu}, activation function
            pad_ratio: the ratio of the extended domain
        '''
        super(FNO3d, self).__init__()

        if isinstance(pad_ratio, float):
            pad_ratio = [pad_ratio, pad_ratio]
        else:
            assert len(pad_ratio) == 2, 'Cannot add padding in more than 2 directions.'

        self.pad_ratio = pad_ratio
        self.modes1 = modes1
        self.modes2 = modes2
        self.modes3 = modes3
        self.pad_ratio = pad_ratio

        if layers is None:
            self.layers = [width] * 4
        else:
            self.layers = layers
        self.fc0 = nn.Linear(in_dim, layers[0])

        self.sp_convs = nn.ModuleList([SpectralConv3d(
            in_size, out_size, mode1_num, mode2_num, mode3_num)
            for in_size, out_size, mode1_num, mode2_num, mode3_num
            in zip(self.layers, self.layers[1:], self.modes1, self.modes2, self.modes3)])

        self.ws = nn.ModuleList([nn.Conv1d(in_size, out_size, 1)
                                 for in_size, out_size in zip(self.layers, self.layers[1:])])

        self.fc1 = nn.Linear(layers[-1], fc_dim)
        self.fc2 = nn.Linear(fc_dim, out_dim)
        self.act = _get_act(act)

    def forward(self, x):
        '''
        Args:
            x: (batchsize, x_grid, y_grid, t_grid, 3)

        Returns:
            u: (batchsize, x_grid, y_grid, t_grid, 1)

        '''
        size_z = x.shape[-2]
        if max(self.pad_ratio) > 0:
            num_pad = [round(size_z * i) for i in self.pad_ratio]
        else:
            num_pad = [0., 0.]
        length = len(self.ws)
        batchsize = x.shape[0]
        
        x = self.fc0(x)
        x = x.permute(0, 4, 1, 2, 3)
        x = add_padding(x, num_pad=num_pad)
        size_x, size_y, size_z = x.shape[-3], x.shape[-2], x.shape[-1]

        for i, (speconv, w) in enumerate(zip(self.sp_convs, self.ws)):
            x1 = speconv(x)
            x2 = w(x.view(batchsize, self.layers[i], -1)).view(batchsize, self.layers[i+1], size_x, size_y, size_z)
            x = x1 + x2
            if i != length - 1:
                x = self.act(x)
        x = remove_padding(x, num_pad=num_pad)
        x = x.permute(0, 2, 3, 4, 1)
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        return x

================================================
FILE: models/lowrank2d.py
================================================
from .FCN import DenseNet

import numpy as np
import torch
import torch.nn as nn


class LowRank2d(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(LowRank2d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.phi = DenseNet([2, 64, 128, in_channels*out_channels], torch.nn.ReLU)
        self.psi = DenseNet([2, 64, 128, in_channels*out_channels], torch.nn.ReLU)

    def get_grid(self, S1, S2, batchsize, device):
        gridx = torch.tensor(np.linspace(0, 1, S1+1)[:-1], dtype=torch.float)
        gridx = gridx.reshape(1, S1, 1).repeat([batchsize, 1, S2])
        gridy = torch.tensor(np.linspace(0, 1, S2+1)[:-1], dtype=torch.float)
        gridy = gridy.reshape(1, 1, S2).repeat([batchsize, S1, 1])
        return torch.stack((gridx, gridy), dim=-1).to(device)

    def forward(self, x, gridy=None):
        # x (batch, channel, x, y)
        # y (batch, Ny, 2)
        batchsize, size1, size2 = x.shape[0], x.shape[2], x.shape[3]

        gridx = self.get_grid(S1=size1, S2=size2, batchsize=1, device=x.device).reshape(size1 * size2, 2)
        if gridy is None:
            gridy = self.get_grid(S1=size1, S2=size2, batchsize=batchsize, device=x.device).reshape(batchsize, size1 * size2, 2)
        Nx = size1 * size2
        Ny = gridy.shape[1]

        phi_eval = self.phi(gridx).reshape(Nx, self.out_channels, self.in_channels)
        psi_eval = self.psi(gridy).reshape(batchsize, Ny, self.out_channels, self.in_channels)
        x = x.reshape(batchsize, self.in_channels, Nx)

        x = torch.einsum('noi,bin,bmoi->bom', phi_eval, x, psi_eval) / Nx
        return x

================================================
FILE: models/tfno.py
================================================
import torch.nn as nn
import torch.nn.functional as F
from .core import FactorizedSpectralConv2d, JointFactorizedSpectralConv1d, FactorizedSpectralConv3d


class FactorizedFNO3d(nn.Module):
    def __init__(self, modes_height, modes_width,  modes_depth, width, fc_channels=256, n_layers=4,
                joint_factorization=True, non_linearity=F.gelu,
                rank=1.0, factorization='cp', fixed_rank_modes=False,
                domain_padding=9, in_channels=3, Block=None,
                verbose=True, fft_contraction='complex',
                fft_norm='backward',
                mlp=False,
                decomposition_kwargs=dict()):
        super().__init__()
        self.modes_height = modes_height
        self.modes_width = modes_width
        self.modes_depth = modes_depth
        self.width = width
        self.fc_channels = fc_channels
        self.n_layers = n_layers
        self.joint_factorization = joint_factorization
        self.non_linearity = non_linearity
        self.rank = rank
        self.factorization = factorization
        self.fixed_rank_modes = fixed_rank_modes
        self.domain_padding = domain_padding # pad the domain if input is non-periodic
        self.in_channels = in_channels
        self.decomposition_kwargs = decomposition_kwargs
        self.fft_norm = fft_norm
        self.verbose = verbose
    
        if Block is None:
            Block = FactorizedSpectralConv3d
        if verbose:
            print(f'FNO Block using {Block}, fft_contraction={fft_contraction}')

        self.Block = Block

        if joint_factorization:
            self.convs = Block(self.width, self.width, self.modes_height, self.modes_width, self.modes_depth,
                               rank=rank,
                               fft_contraction=fft_contraction,
                               fft_norm=fft_norm,
                               factorization=factorization, 
                               fixed_rank_modes=fixed_rank_modes, 
                               decomposition_kwargs=decomposition_kwargs,
                               mlp=mlp,
                               n_layers=n_layers)
        else:
            self.convs = nn.ModuleList([Block(self.width, self.modes_height, self.modes_width, self.modes_depth,
                                              fft_contraction=fft_contraction,
                                              rank=rank,
                                              factorization=factorization, 
                                              fixed_rank_modes=fixed_rank_modes, 
                                              decomposition_kwargs=decomposition_kwargs,
                                              mlp=mlp,
                                              n_layers=1) for _ in range(n_layers)])
        self.linears = nn.ModuleList([nn.Conv3d(self.width, self.width, 1) for _ in range(n_layers)])
        
        self.fc0 = nn.Linear(in_channels, self.width) # input channel is 3: (a(x, y), x, y)
        self.fc1 = nn.Linear(self.width, fc_channels)
        self.fc2 = nn.Linear(fc_channels, 1)

    def forward(self, x, super_res=1):
        #grid = self.get_grid(x.shape, x.device)
        #x = torch.cat((x, grid), dim=-1)
        #x = self.fc0(x)
        #x = x.permute(0, 3, 1, 2)

        x = x.permute(0,2,3,4,1)
        x = self.fc0(x)
        x = x.permute(0,4,1,2,3)

        x = F.pad(x, [0, self.domain_padding])

        for i in range(self.n_layers):
            if super_res > 1 and i == (self.n_layers - 1):
                super_res = super_res
            else:
                super_res = 1

            x1 = self.convs[i](x) #, super_res=super_res)
            x2 = self.linears[i](x)
            x = x1 + x2
            if i < (self.n_layers - 1):
                x = self.non_linearity(x)

        x = x[..., :-self.domain_padding]
        x = x.permute(0, 2, 3, 4, 1)
        x = self.fc1(x)
        x = self.non_linearity(x)
        x = self.fc2(x)
        x = x.permute(0,4,1,2,3)
        return x


class FactorizedFNO2d(nn.Module):
    def __init__(self, modes_height, modes_width,  width, fc_channels=256, n_layers=4,
                joint_factorization=True, non_linearity=F.gelu,
                rank=1.0, factorization='cp', fixed_rank_modes=False,
                domain_padding=9, in_channels=3, Block=None,
                verbose=True, fft_contraction='complex',
                fft_norm='backward',
                decomposition_kwargs=dict()):
        super().__init__()
        """
        input: the solution of the coefficient function and locations (a(x, y), x, y)
        input shape: (batchsize, x=s, y=s, c=3)
        output: the solution 
        output shape: (batchsize, x=s, y=s, c=1)
        """
        self.modes_height = modes_height
        self.modes_width = modes_width
        self.width = width
        self.fc_channels = fc_channels
        self.n_layers = n_layers
        self.joint_factorization = joint_factorization
        self.non_linearity = non_linearity
        self.rank = rank
        self.factorization = factorization
        self.fixed_rank_modes = fixed_rank_modes
        self.domain_padding = domain_padding # pad the domain if input is non-periodic
        self.in_channels = in_channels
        self.decomposition_kwargs = decomposition_kwargs
        self.fft_norm = fft_norm
        self.verbose = verbose
    
        if Block is None:
            Block = FactorizedSpectralConv2d
        if verbose:
            print(f'FNO Block using {Block}, fft_contraction={fft_contraction}')

        self.Block = Block

        if joint_factorization:
            self.convs = Block(self.width, self.width, self.modes_height, self.modes_width, 
                               rank=rank,
                               fft_contraction=fft_contraction,
                               fft_norm=fft_norm,
                               factorization=factorization, 
                               fixed_rank_modes=fixed_rank_modes, 
                               decomposition_kwargs=decomposition_kwargs,
                               n_layers=n_layers)
        else:
            self.convs = nn.ModuleList([Block(self.width, self.width, self.modes_height,
                                              self.modes_width,
                                              fft_contraction=fft_contraction,
                                              rank=rank,
                                              factorization=factorization, 
                                              fixed_rank_modes=fixed_rank_modes, 
                                              decomposition_kwargs=decomposition_kwargs,
                                              n_layers=1) for _ in range(n_layers)])
        self.linears = nn.ModuleList([nn.Conv2d(self.width, self.width, 1) for _ in range(n_layers)])
        
        self.fc0 = nn.Linear(in_channels, self.width) # input channel is 3: (a(x, y), x, y)
        self.fc1 = nn.Linear(self.width, fc_channels)
        self.fc2 = nn.Linear(fc_channels, 1)

    def forward(self, x, super_res=1):
        #grid = self.get_grid(x.shape, x.device)
        #x = torch.cat((x, grid), dim=-1)
        #x = self.fc0(x)
        #x = x.permute(0, 3, 1, 2)

        x = x.permute(0,2,3,1)
        x = self.fc0(x)
        x = x.permute(0,3,1,2)

        x = F.pad(x, [0, self.domain_padding, 0, self.domain_padding])

        for i in range(self.n_layers):
            if super_res > 1 and i == (self.n_layers - 1):
                super_res = super_res
            else:
                super_res = 1

            x1 = self.convs[i](x) #, super_res=super_res)
            x2 = self.linears[i](x)
            x = x1 + x2
            if i < (self.n_layers - 1):
                x = self.non_linearity(x)

        x = x[..., :-self.domain_padding, :-self.domain_padding]
        x = x.permute(0, 2, 3, 1)
        x = self.fc1(x)
        x = self.non_linearity(x)
        x = self.fc2(x)
        x = x.permute(0,3,1,2)
        return x

    # def extra_repr(self):
    #     s = (f'{self.modes_height=}, {self.modes_width=},  {self.width=}, {self.fc_channels=}, {self.n_layers=}, '
    #          f'{self.joint_factorization=}, {self.non_linearity=}, '
    #          f'{self.rank=}, {self.factorization=}, {self.fixed_rank_modes=}, '
    #          f'{self.domain_padding=}, {self.in_channels=}, {self.Block=}, '
    #          f'{self.verbose=}, '
    #          f'{self.decomposition_kwargs=}')
    #     return s

    
class FactorizedFNO1d(nn.Module):
    def __init__(self, modes, width, in_channels=2, out_channels=1, n_layers=4, 
                 lifting=None, projection=None, joint_factorization=True,  scale='auto', 
                 non_linearity=nn.GELU, rank=1.0, factorization='tucker', bias=True, 
                 fixed_rank_modes=False, fft_norm='forward', decomposition_kwargs=dict()):
        super().__init__()

        if isinstance(width, int):
            init_width = width
            final_width = width
        else:
            init_width = width[0]
            final_width = width[-1]
        
        self.non_linearity = non_linearity()

        if lifting is None:
            self.lifting = nn.Linear(in_channels, init_width)
        
        if projection is None:
            self.projection = nn.Sequential(nn.Linear(final_width, 256),
                                            self.non_linearity,
                                            nn.Linear(256, out_channels))

        self.fno_layers = JointFactorizedSpectralConv1d(modes, width, n_layers=n_layers, joint_factorization=joint_factorization,
                                                        in_channels=init_width, scale=scale, non_linearity=non_linearity,
                                                        rank=rank, factorization=factorization, bias=bias, fixed_rank_modes=fixed_rank_modes, 
                                                        fft_norm=fft_norm, decomposition_kwargs=decomposition_kwargs)
                                                        
    def forward(self, x, s=None):
        #Lifting
        x = x.permute(0,2,1)
        x = self.lifting(x)
        x = x.permute(0,2,1)

        #Fourier layers
        x = self.fno_layers(x, s=s)

        #Projection
        x = x.permute(0,2,1)
        x = self.projection(x)
        x = x.permute(0,2,1)
        
        return x


================================================
FILE: models/utils.py
================================================
import torch.nn.functional as F


def add_padding(x, num_pad):
    if max(num_pad) > 0:
        res = F.pad(x, (num_pad[0], num_pad[1]), 'constant', 0)
    else:
        res = x
    return res


def add_padding2(x, num_pad1, num_pad2):
    if max(num_pad1) > 0 or max(num_pad2) > 0:
        res = F.pad(x, (num_pad2[0], num_pad2[1], num_pad1[0], num_pad1[1]), 'constant', 0.)
    else:
        res = x
    return res


def remove_padding(x, num_pad):
    if max(num_pad) > 0:
        res = x[..., num_pad[0]:-num_pad[1]]
    else:
        res = x
    return res


def remove_padding2(x, num_pad1, num_pad2):
    if max(num_pad1) > 0 or max(num_pad2) > 0:
        res = x[..., num_pad1[0]:-num_pad1[1], num_pad2[0]:-num_pad2[1]]
    else:
        res = x
    return res


def _get_act(act):
    if act == 'tanh':
        func = F.tanh
    elif act == 'gelu':
        func = F.gelu
    elif act == 'relu':
        func = F.relu_
    elif act == 'elu':
        func = F.elu_
    elif act == 'leaky_relu':
        func = F.leaky_relu_
    else:
        raise ValueError(f'{act} is not supported')
    return func


================================================
FILE: pinns.py
================================================
from argparse import ArgumentParser
import yaml

from baselines.pinns_ns_05s import train
from baselines.pinns_ns_50s import train_longtime
from baselines.sapinns import train_sapinn
import csv


if __name__ == '__main__':
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='log the results')
    parser.add_argument('--start', type=int, default=0, help='start index')
    parser.add_argument('--stop', type=int, default=1, help='stopping index')
    args = parser.parse_args()

    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    with open(config['log']['logfile'], 'a') as f:
        writer = csv.writer(f)
        writer.writerow(['Index', 'Error in u', 'Error in v', 'Error in w', 'Step', 'Time cost'])

    for i in range(args.start, args.stop):
        print(f'Start to solve instance {i}')
        if 'time_scale' in config['data']:
            train_longtime(i, config, args)
        elif config['log']['group'] == 'SA-PINNs':
            train_sapinn(i, config, args)
        else:
            train(i, config, args)


================================================
FILE: prepare_data.py
================================================
import numpy as np
import matplotlib.pyplot as plt


def shuffle_data(datapath):
    data = np.load(datapath)
    rng = np.random.default_rng(123)
    rng.shuffle(data, axis=0)
    savepath = datapath.replace('.npy', '-shuffle.npy')
    np.save(savepath, data)


def test_data(datapath):
    raw = np.load(datapath, mmap_mode='r')
    print(raw[0, 0, 0, 0:10])
    newpath = datapath.replace('.npy', '-shuffle.npy')
    new = np.load(newpath, mmap_mode='r')
    print(new[0, 0, 0, 0:10])


def get_slice(datapath):
    raw = np.load(datapath, mmap_mode='r')

    data = raw[-10:]
    print(data.shape)
    savepath = 'data/Re500-5x513x256x256.npy'
    np.save(savepath, data)


def plot_test(datapath):
    duration = 0.125
    raw = np.load(datapath, mmap_mode='r')
    

if __name__ == '__main__':
    # datapath = '../data/NS-Re500_T300_id0.npy'
    # shuffle_data(datapath)
    # test_data(datapath)

    datapath = '/raid/hongkai/NS-Re500_T300_id0-shuffle.npy'
    get_slice(datapath)

================================================
FILE: profile-solver-legacy.py
================================================
import math

import torch
from solver.legacy_solver import navier_stokes_2d, GaussianRF

import scipy.io
from timeit import default_timer


if __name__ == '__main__':
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # Resolution
    s = 2048
    sub = 1

    # Number of solutions to generate
    N = 1

    # Set up 2d GRF with covariance parameters
    GRF = GaussianRF(2, s, alpha=2.5, tau=7, device=device)

    # Forcing function: 0.1*(sin(2pi(x+y)) + cos(2pi(x+y)))
    t = torch.linspace(0, 1, s + 1, device=device)
    t = t[0:-1]

    X, Y = torch.meshgrid(t, t)
    f = 0.1 * (torch.sin(2 * math.pi * (X + Y)) + torch.cos(2 * math.pi * (X + Y)))

    # Number of snapshots from solution
    record_steps = 200

    # Inputs
    a = torch.zeros(N, s, s)
    # Solutions
    u = torch.zeros(N, s, s, record_steps)

    # Solve equations in batches (order of magnitude speed-up)

    # Batch size
    bsize = 1

    c = 0
    t0 = default_timer()
    for j in range(N // bsize):
        # Sample random feilds
        w0 = GRF.sample(bsize)

        # Solve NS
        sol, sol_t = navier_stokes_2d(w0, f, 1e-3, 50.0, 1e-4, record_steps)

        a[c:(c + bsize), ...] = w0
        u[c:(c + bsize), ...] = sol

        c += bsize
        t1 = default_timer()
        print(f'Time cost {t1 - t0} s')
    torch.save(
        {
            'a': a.cpu(),
            'u': u.cpu(),
            't': sol_t.cpu()
        },
        'data/ns_data.pt'
    )
    # scipy.io.savemat('data/ns_data.mat', mdict={'a': a.cpu().numpy(), 'u': u.cpu().numpy(), 't': sol_t.cpu().numpy()})

================================================
FILE: profiler/calmacs.py
================================================
from ptflops import get_model_complexity_info


================================================
FILE: run_pino2d.py
================================================
import yaml
from argparse import ArgumentParser
import random

import torch

from models import FNO2d
from train_utils import Adam
from torch.utils.data import DataLoader
from train_utils.datasets import DarcyFlow
from train_utils.train_2d import train_2d_operator


def train(args, config):
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    torch.manual_seed(seed)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    dataset = DarcyFlow(data_config['datapath'],
                        nx=data_config['nx'], sub=data_config['sub'],
                        offset=data_config['offset'], num=data_config['n_sample'])
    dataloader = DataLoader(dataset, batch_size=config['train']['batchsize'])
    model = FNO2d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'],
                  act=config['model']['act']).to(device)
    # Load from checkpoint
    if 'ckpt' in config['train']:
        ckpt_path = config['train']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    optimizer = Adam(model.parameters(), betas=(0.9, 0.999),
                         lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=config['train']['milestones'],
                                                     gamma=config['train']['scheduler_gamma'])
    train_2d_operator(model,
                      dataloader,
                      optimizer, scheduler,
                      config, rank=0, log=args.log,
                      project=config['log']['project'],
                      group=config['log']['group'])


if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--start', type=int, help='Start index of test instance')
    parser.add_argument('--stop', type=int, help='Stop index of instances')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    args = parser.parse_args()

    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    for i in range(args.start, args.stop):
        print(f'Start solving instance {i}')
        config['data']['offset'] = i
        train(args, config)
    print(f'{args.stop - args.start} instances are solved')

================================================
FILE: run_pino3d.py
================================================
import random
import yaml

import torch
from torch.utils.data import DataLoader

from train_utils import Adam, NSLoader, get_forcing
from train_utils.train_3d import train

from models import FNO3d
from argparse import ArgumentParser
from train_utils.utils import requires_grad


def run_instance(loader, config, data_config):
    trainset = loader.make_dataset(data_config['n_sample'],
                                   start=data_config['offset'])
    train_loader = DataLoader(trainset, batch_size=config['train']['batchsize'])
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers']).to(device)

    if 'ckpt' in config['train']:
        ckpt_path = config['train']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)

    if 'twolayer' in config['train'] and config['train']['twolayer']:
        requires_grad(model, False)
        requires_grad(model.sp_convs[-1], True)
        requires_grad(model.ws[-1], True)
        requires_grad(model.fc1, True)
        requires_grad(model.fc2, True)
        params = []
        for param in model.parameters():
            if param.requires_grad == True:
                params.append(param)
    else:
        params = model.parameters()

    beta1 = config['train']['beta1'] if 'beta1' in config['train'] else 0.9
    beta2 = config['train']['beta2'] if 'beta2' in config['train'] else 0.999
    optimizer = Adam(params, betas=(beta1, beta2),
                     lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=config['train']['milestones'],
                                                     gamma=config['train']['scheduler_gamma'])
    forcing = get_forcing(loader.S).to(device)
    profile = config['train']['profile'] if 'profile' in config['train'] else False
    train(model,
          loader, train_loader,
          optimizer, scheduler,
          forcing, config,
          rank=0,
          log=options.log,
          project=config['log']['project'],
          group=config['log']['group'],
          use_tqdm=True,
          profile=profile)


if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--start', type=int, help='Start index of test instance')
    parser.add_argument('--stop', type=int, help='Stop index of instances')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    options = parser.parse_args()

    config_file = options.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']

    loader = NSLoader(datapath1=data_config['datapath'],
                      nx=data_config['nx'], nt=data_config['nt'],
                      sub=data_config['sub'], sub_t=data_config['sub_t'],
                      N=data_config['total_num'],
                      t_interval=data_config['time_interval'])
    for i in range(options.start, options.stop):
        print('Start training on instance %d' % i)
        config['data']['offset'] = i
        data_config['offset'] = i
        seed = random.randint(1, 10000)
        print(f'Random seed :{seed}')
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        run_instance(loader, config, data_config)
    print('Done!')


================================================
FILE: run_solver.py
================================================
import random
import math
import numpy as np
import yaml
from argparse import ArgumentParser
from timeit import default_timer

import torch

from train_utils.datasets import NSLoader
from train_utils.losses import LpLoss
from solver.kolmogorov_flow import KolmogorovFlow2d


def solve(a,
          res_x,
          res_t,
          end,
          Re,
          n=4,
          delta_t=1e-3):
    '''
    Given initial condition a, solve for u in time interval [0, end]
    Args:
        a: initial condition, res_x by res_x tensor
        res_x: resolution in space
        res_t: record step in time
        end: end of the time interval
        Re: Reynolds number
        n: forcing number
    Returns:
        tensor of shape (res_x, res_x, res_t)
    '''
    dt = end / res_t

    solver = KolmogorovFlow2d(a, Re, n)
    sol = torch.zeros((res_x, res_x, res_t + 1), device=a.device)
    sol[:, :, 0] = a
    for j in range(res_t):
        solver.advance(dt, delta_t=delta_t)
        sol[:, :, 1 + j] = solver.vorticity().squeeze(0)
    return sol


if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--deltat', type=float, default=1e-3, help='delta T')
    args = parser.parse_args()
    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    data_config = config['data']
    loader = NSLoader(datapath1=data_config['datapath'],
                      nx=data_config['nx'], nt=data_config['nt'],
                      sub=data_config['sub'], sub_t=data_config['sub_t'],
                      N=data_config['total_num'],
                      t_interval=data_config['time_interval'])
    a_loader = loader.make_loader(data_config['n_sample'],
                                  batch_size=config['train']['batchsize'],
                                  start=data_config['offset'],
                                  train=data_config['shuffle'])

    print(f'Solver starts on device: {device}')
    myloss = LpLoss(size_average=True)
    test_err = []
    time_cost = []
    # run solver
    for _, u in a_loader:
        u = u[0].to(device)
        torch.cuda.synchronize()
        t1 = default_timer()
        pred = solve(u[:, :, 0],
                     res_x=loader.S,
                     res_t=loader.T - 1,
                     end=data_config['time_interval'],
                     Re=data_config['Re'],
                     n=4,
                     delta_t=args.deltat)
        torch.cuda.synchronize()
        t2 = default_timer()

        # report test error
        test_l2 = myloss(pred, u)

        test_err.append(test_l2.item())
        print(f'Test l2: {test_l2.item()}')
        time_cost.append(t2 - t1)

    test_err = np.array(test_err)
    time_cost = np.array(time_cost)

    idx = data_config['offset']
    n_sample = data_config['n_sample']
    print(f'Test instance: {idx} to {idx+n_sample}; \n'
          f'Time cost = mean: {time_cost.mean()}s; std_err: {time_cost.std(ddof=1) / math.sqrt(len(a_loader))}s; \n'
          f'Solver resolution: {loader.S} x {loader.S} x {loader.T}; \n'
          f'Test L2 error = mean: {test_err.mean()}; std_err: {test_err.std(ddof=1) / math.sqrt(len(a_loader))}')


================================================
FILE: scripts/device1-finetune.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 1 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 3 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 4 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 5 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 6 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 7 \
--log;


================================================
FILE: scripts/device2-finetune.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 0 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 1 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 2 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 3 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 4 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 5 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 6 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4.yaml \
--start 7 \
--log;


================================================
FILE: scripts/device3.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=3 python3 run_pino3d.py \
--config_path configs/transfer/Re100to300-1s.yaml \
--start 0 \
--stop 40 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 run_pino3d.py \
--config_path configs/transfer/Re200to300-1s.yaml \
--start 0 \
--stop 40 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 run_pino3d.py \
--config_path configs/transfer/Re250to300-1s.yaml \
--start 0 \
--stop 40 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 run_pino3d.py \
--config_path configs/transfer/Re300to300-1s.yaml \
--start 0 \
--stop 40 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 run_pino3d.py \
--config_path configs/transfer/Re350to300-1s.yaml \
--start 0 \
--stop 40 \
--log;
CUDA_VISIBLE_DEVICES=3 python3 run_pino3d.py \
--config_path configs/transfer/Re400to300-1s.yaml \
--start 0 \
--stop 40 \
--log;


================================================
FILE: scripts/finetune-4k-2layer.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 9 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 10 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 11 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 12 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 13 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 14 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 15 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 16 \
--log;
CUDA_VISIBLE_DEVICES=1 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k-2layer.yaml \
--start 17 \
--log;


================================================
FILE: scripts/finetune-4k0.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k.yaml \
--start 9 \
--log;


================================================
FILE: scripts/finetune-4k1-2layer.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 9 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 10 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 11 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 12 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 13 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 14 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 15 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 16 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s-2layer.yaml \
--start 17 \
--log;


================================================
FILE: scripts/finetune-4k1.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 9 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 10 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 11 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 12 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 13 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 14 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 15 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 16 \
--log;
CUDA_VISIBLE_DEVICES=0 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s.yaml \
--start 17 \
--log;


================================================
FILE: scripts/finetune-4k4-2layer.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 9 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 10 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 11 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 12 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 13 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 14 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 15 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 16 \
--log;
CUDA_VISIBLE_DEVICES=2 python3 train_PINO3d.py \
--config_path configs/finetune/Re500-finetune-05s4k4-2layer.yaml \
--start 17 \
--log;


================================================
FILE: scripts/fnoRe500.sh
================================================
#! /bin/bash
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=16
#SBATCH --gres gpu:v100:1
#SBATCH --mem=64G
#SBATCH --email-user=hzzheng@caltech.edu
#SBATCH --mail-type=BEGIN
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL

python3 train_operator.py --config_path configs/operator/Re500-FNO.yaml

================================================
FILE: scripts/ngc_submit_pino.sh
================================================
ngc batch run --name 'ml-model.PINO.ns-dat400' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_dat400.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

ngc batch run --name 'ml-model.PINO.ns-dat200' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_dat200.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

ngc batch run --name 'ml-model.PINO.ns-dat80' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_dat80.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

ngc batch run --name 'ml-model.PINO.ns-dat40' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_dat40.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

ngc batch run --name 'ml-model.PINO.ns-dat0' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_dat0.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

ngc batch run --name 'ml-model.PINO.ns-res32' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_res32.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

ngc batch run --name 'ml-model.PINO.ns-res16' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_res16.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

================================================
FILE: scripts/ngc_test_submit_pino.sh
================================================
ngc batch run --name 'ml-model.PINO.ns-dat800' --preempt RESUMABLE \
--commandline 'cd /Code/PINO; git pull; bash scripts/train_dat800.sh' \
--image 'nvidia/pytorch:22.08-py3' \
--priority HIGH \
--ace nv-us-west-2 \
--instance dgxa100.40g.1.norm \
--workspace QsixjfOES8uYIp5kwIDblQ:/Code \
--datasetid 111345:/mount/data \
--team nvr-aialgo \
--result /results

================================================
FILE: scripts/pretrain.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=0 python3 pretrain.py \
--config_path configs/pretrain/Re500-pretrain-1s.yaml


================================================
FILE: scripts/scratchRe500.sh
================================================
#! /bin/bash
CUDA_VISIBLE_DEVICES=2 python3 run_pino3d.py \
--config_path configs/scratch/Re500-scratch-05s.yaml \
--start 0 \
--stop 10 \
--log

================================================
FILE: scripts/test-opt/Re500-1_8.sh
================================================
#! /bin/bash
for i in {0..49}
do
CUDA_VISIBLE_DEVICES=1 python3 instance_opt.py --config configs/instance/Re500-1_8-PINO.yaml --ckpt checkpoints/Re500-1_8s-800-PINO-140000.pt --idx $i --tqdm
done

================================================
FILE: scripts/train_dat0.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-dat40-PINO.yaml --log

================================================
FILE: scripts/train_dat200.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-dat200-PINO.yaml --log

================================================
FILE: scripts/train_dat40.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-dat40-PINO.yaml --log

================================================
FILE: scripts/train_dat400.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-dat400-PINO.yaml --log

================================================
FILE: scripts/train_dat80.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-dat80-PINO.yaml --log

================================================
FILE: scripts/train_dat800.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-dat800-PINO.yaml --log

================================================
FILE: scripts/train_res16.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-res16-PINO.yaml --log

================================================
FILE: scripts/train_res32.sh
================================================
pip install wandb tqdm pyyaml
wandb login 69a3bddb4146cf76113885de5af84c7f4c165753
python3 train_pino.py --config configs/ngc/Re500-1_8-res32-PINO.yaml --log

================================================
FILE: solver/__init__.py
================================================


================================================
FILE: solver/kolmogorov_flow.py
================================================
import torch
import math


class KolmogorovFlow2d(object):

    def __init__(self, w0, Re, n):

        # Grid size

        self.s = w0.size()[-1]

        assert self.s == w0.size()[-2], "Grid must be uniform in both directions."

        assert math.log2(self.s).is_integer(), "Grid size must be power of 2."

        assert n >= 0 and isinstance(n, int), "Forcing number must be non-negative integer."

        assert n < self.s // 2 - 1, "Forcing number too large for grid size."

        # Forcing number
        self.n = n

        assert Re > 0, "Reynolds number must be positive."

        # Reynolds number
        self.Re = Re

        # Device
        self.device = w0.device

        # Current time
        self.time = 0.0

        # Current vorticity in Fourier space
        self.w_h = torch.fft.fft2(w0, norm="backward")

        # Wavenumbers in y and x directions
        self.k_y = torch.cat((torch.arange(start=0, end=self.s // 2, step=1, dtype=torch.float32, device=self.device), \
                              torch.arange(start=-self.s // 2, end=0, step=1, dtype=torch.float32, device=self.device)),
                             0).repeat(self.s, 1)

        self.k_x = self.k_y.clone().transpose(0, 1)

        # Negative inverse Laplacian in Fourier space
        self.inv_lap = (self.k_x ** 2 + self.k_y ** 2)
        self.inv_lap[0, 0] = 1.0
        self.inv_lap = 1.0 / self.inv_lap

        # Negative scaled Laplacian
        self.G = (1.0 / self.Re) * (self.k_x ** 2 + self.k_y ** 2)

        # Dealiasing mask using 2/3 rule
        self.dealias = (self.k_x ** 2 + self.k_y ** 2 <= (self.s / 3.0) ** 2).float()
        # Ensure mean zero
        self.dealias[0, 0] = 0.0

    # Get current vorticity from stream function (Fourier space)
    def vorticity(self, stream_f=None, real_space=True):
        if stream_f is not None:
            w_h = self.Re * self.G * stream_f
        else:
            w_h = self.w_h

        if real_space:
            return torch.fft.irfft2(w_h, s=(self.s, self.s), norm="backward")
        else:
            return w_h

    # Compute stream function from vorticity (Fourier space)
    def stream_function(self, w_h=None, real_space=False):
        if w_h is None:
            psi_h = self.w_h.clone()
        else:
            psi_h = w_h.clone()

        # Stream function in Fourier space: solve Poisson equation
        psi_h = self.inv_lap * psi_h

        if real_space:
            return torch.fft.irfft2(psi_h, s=(self.s, self.s), norm="backward")
        else:
            return psi_h

    # Compute velocity field from stream function (Fourier space)
    def velocity_field(self, stream_f=None, real_space=True):
        if stream_f is None:
            stream_f = self.stream_function(real_space=False)

        # Velocity field in x-direction = psi_y
        q_h = stream_f * 1j * self.k_y

        # Velocity field in y-direction = -psi_x
        v_h = stream_f * -1j * self.k_x

        if real_space:
            q = torch.fft.irfft2(q_h, s=(self.s, self.s), norm="backward")
            v = torch.fft.irfft2(v_h, s=(self.s, self.s), norm="backward")
            return q, v
        else:
            return q_h, v_h

    # Compute non-linear term + forcing from given vorticity (Fourier space)
    def nonlinear_term(self, w_h):
        # Physical space vorticity
        w = torch.fft.ifft2(w_h, s=(self.s, self.s), norm="backward")

        # Velocity field in physical space
        q, v = self.velocity_field(self.stream_function(w_h, real_space=False), real_space=True)

        # Compute non-linear term
        t1 = torch.fft.fft2(q * w, s=(self.s, self.s), norm="backward")
        t1 = self.k_x * t1

        t2 = torch.fft.fft2(v * w, s=(self.s, self.s), norm="backward")
        t2 = self.k_y * t2

        nonlin = -1j * (t1 + t2)

        # Apply forcing: -ncos(ny)
        if self.n > 0:
            nonlin[..., 0, self.n] -= (float(self.n) / 2.0) * (self.s ** 2)
            nonlin[..., 0, -self.n] -= (float(self.n) / 2.0) * (self.s ** 2)

        return nonlin

    def advance(self, t, delta_t=1e-3):

        # Final time
        T = self.time + t

        # Advance solution in Fourier space
        while self.time < T:

            if self.time + delta_t > T:
                current_delta_t = T - self.time
            else:
                current_delta_t = delta_t

            # Inner-step of Heun's method
            nonlin1 = self.nonlinear_term(self.w_h)
            w_h_tilde = (self.w_h + current_delta_t * (nonlin1 - 0.5 * self.G * self.w_h)) / (
                        1.0 + 0.5 * current_delta_t * self.G)

            # Cranck-Nicholson + Heun update
            nonlin2 = self.nonlinear_term(w_h_tilde)
            self.w_h = (self.w_h + current_delta_t * (0.5 * (nonlin1 + nonlin2) - 0.5 * self.G * self.w_h)) / (
                        1.0 + 0.5 * current_delta_t * self.G)

            # De-alias
            self.w_h *= self.dealias
            self.time += current_delta_t


================================================
FILE: solver/legacy_solver.py
================================================
import torch
import math
import scipy.io
from timeit import default_timer
from tqdm import tqdm


class GaussianRF(object):

    def __init__(self, dim, size, alpha=2, tau=3, sigma=None, boundary="periodic", device=None):

        self.dim = dim
        self.device = device

        if sigma is None:
            sigma = tau**(0.5*(2*alpha - self.dim))

        k_max = size//2

        if dim == 1:
            k = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                           torch.arange(start=-k_max, end=0, step=1, device=device)), 0)

            self.sqrt_eig = size*math.sqrt(2.0)*sigma*((4*(math.pi**2)*(k**2) + tau**2)**(-alpha/2.0))
            self.sqrt_eig[0] = 0.0

        elif dim == 2:
            wavenumers = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                                    torch.arange(start=-k_max, end=0, step=1, device=device)), 0).repeat(size,1)

            k_x = wavenumers.transpose(0,1)
            k_y = wavenumers

            self.sqrt_eig = (size**2)*math.sqrt(2.0)*sigma*((4*(math.pi**2)*(k_x**2 + k_y**2) + tau**2)**(-alpha/2.0))
            self.sqrt_eig[0,0] = 0.0

        elif dim == 3:
            wavenumers = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                                    torch.arange(start=-k_max, end=0, step=1, device=device)), 0).repeat(size,size,1)

            k_x = wavenumers.transpose(1,2)
            k_y = wavenumers
            k_z = wavenumers.transpose(0,2)

            self.sqrt_eig = (size**3)*math.sqrt(2.0)*sigma*((4*(math.pi**2)*(k_x**2 + k_y**2 + k_z**2) + tau**2)**(-alpha/2.0))
            self.sqrt_eig[0,0,0] = 0.0

        self.size = []
        for j in range(self.dim):
            self.size.append(size)

        self.size = tuple(self.size)

    def sample(self, N):

        coeff = torch.randn(N, *self.size, 2, device=self.device)

        coeff[...,0] = self.sqrt_eig*coeff[...,0]
        coeff[...,1] = self.sqrt_eig*coeff[...,1]

        u = torch.ifft(coeff, self.dim, normalized=False)
        u = u[...,0]

        return u

#w0: initial vorticity
#f: forcing term
#visc: viscosity (1/Re)
#T: final time
#delta_t: internal time-step for solve (descrease if blow-up)
#record_steps: number of in-time snapshots to record
def navier_stokes_2d(w0, f, visc, T, delta_t=1e-4, record_steps=1):

    #Grid size - must be power of 2
    N = w0.size()[-1]

    #Maximum frequency
    k_max = math.floor(N/2.0)

    #Number of steps to final time
    steps = math.ceil(T/delta_t)

    #Initial vorticity to Fourier space
    w_h = torch.rfft(w0, 2, normalized=False, onesided=False)

    #Forcing to Fourier space
    f_h = torch.rfft(f, 2, normalized=False, onesided=False)

    #If same forcing for the whole batch
    if len(f_h.size()) < len(w_h.size()):
        f_h = torch.unsqueeze(f_h, 0)

    #Record solution every this number of steps
    record_time = math.floor(steps/record_steps)

    #Wavenumbers in y-direction
    k_y = torch.cat((torch.arange(start=0, end=k_max, step=1, device=w0.device), torch.arange(start=-k_max, end=0, step=1, device=w0.device)), 0).repeat(N,1)
    #Wavenumbers in x-direction
    k_x = k_y.transpose(0,1)
    #Negative Laplacian in Fourier space
    lap = 4*(math.pi**2)*(k_x**2 + k_y**2)
    lap[0,0] = 1.0
    #Dealiasing mask
    dealias = torch.unsqueeze(torch.logical_and(torch.abs(k_y) <= (2.0/3.0)*k_max, torch.abs(k_x) <= (2.0/3.0)*k_max).float(), 0)

    #Saving solution and time
    sol = torch.zeros(*w0.size(), record_steps, device=w0.device)
    sol_t = torch.zeros(record_steps, device=w0.device)

    #Record counter
    c = 0
    #Physical time
    t = 0.0
    for j in tqdm(range(steps)):
        #Stream function in Fourier space: solve Poisson equation
        psi_h = w_h.clone()
        psi_h[...,0] = psi_h[...,0]/lap
        psi_h[...,1] = psi_h[...,1]/lap

        #Velocity field in x-direction = psi_y
        q = psi_h.clone()
        temp = q[...,0].clone()
        q[...,0] = -2*math.pi*k_y*q[...,1]
        q[...,1] = 2*math.pi*k_y*temp
        q = torch.irfft(q, 2, normalized=False, onesided=False, signal_sizes=(N,N))

        #Velocity field in y-direction = -psi_x
        v = psi_h.clone()
        temp = v[...,0].clone()
        v[...,0] = 2*math.pi*k_x*v[...,1]
        v[...,1] = -2*math.pi*k_x*temp
        v = torch.irfft(v, 2, normalized=False, onesided=False, signal_sizes=(N,N))

        #Partial x of vorticity
        w_x = w_h.clone()
        temp = w_x[...,0].clone()
        w_x[...,0] = -2*math.pi*k_x*w_x[...,1]
        w_x[...,1] = 2*math.pi*k_x*temp
        w_x = torch.irfft(w_x, 2, normalized=False, onesided=False, signal_sizes=(N,N))

        #Partial y of vorticity
        w_y = w_h.clone()
        temp = w_y[...,0].clone()
        w_y[...,0] = -2*math.pi*k_y*w_y[...,1]
        w_y[...,1] = 2*math.pi*k_y*temp
        w_y = torch.irfft(w_y, 2, normalized=False, onesided=False, signal_sizes=(N,N))

        #Non-linear term (u.grad(w)): compute in physical space then back to Fourier space
        F_h = torch.rfft(q*w_x + v*w_y, 2, normalized=False, onesided=False)

        #Dealias
        F_h[...,0] = dealias* F_h[...,0]
        F_h[...,1] = dealias* F_h[...,1]

        #Cranck-Nicholson update
        w_h[...,0] = (-delta_t*F_h[...,0] + delta_t*f_h[...,0] + (1.0 - 0.5*delta_t*visc*lap)*w_h[...,0])/(1.0 + 0.5*delta_t*visc*lap)
        w_h[...,1] = (-delta_t*F_h[...,1] + delta_t*f_h[...,1] + (1.0 - 0.5*delta_t*visc*lap)*w_h[...,1])/(1.0 + 0.5*delta_t*visc*lap)

        #Update real time (used only for recording)
        t += delta_t

        if (j+1) % record_time == 0:
            #Solution in physical space
            w = torch.irfft(w_h, 2, normalized=False, onesided=False, signal_sizes=(N,N))

            #Record solution and time
            sol[...,c] = w
            sol_t[c] = t

            c += 1
    return sol, sol_t


if __name__ == '__main__':
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # Resolution
    # s = 2048
    # sub = 1
    #
    # # Number of solutions to generate
    # N = 10
    #
    # # Set up 2d GRF with covariance parameters
    # GRF = GaussianRF(2, s, alpha=2.5, tau=7, device=device)
    #
    # # Forcing function: 0.1*(sin(2pi(x+y)) + cos(2pi(x+y)))
    # t = torch.linspace(0, 1, s + 1, device=device)
    # t = t[0:-1]
    #
    # X, Y = torch.meshgrid(t, t)
    # f = 0.1 * (torch.sin(2 * math.pi * (X + Y)) + torch.cos(2 * math.pi * (X + Y)))
    #
    # # Number of snapshots from solution
    # record_steps = 200
    #
    # # Inputs
    # a = torch.zeros(N, s, s)
    # # Solutions
    # u = torch.zeros(N, s, s, record_steps)
    #
    # # Solve equations in batches (order of magnitude speed-up)
    #
    # # Batch size
    # bsize = 10
    #
    # c = 0
    # t0 = default_timer()
    # for j in range(N // bsize):
    #     # Sample random feilds
    #     w0 = GRF.sample(bsize)
    #
    #     # Solve NS
    #     sol, sol_t = navier_stokes_2d(w0, f, 1e-3, 50.0, 1e-4, record_steps)
    #
    #     a[c:(c + bsize), ...] = w0
    #     u[c:(c + bsize), ...] = sol
    #
    #     c += bsize
    #     t1 = default_timer()
    #     print(j, c, t1 - t0)
    # torch.save(
    #     {
    #         'a': a.cpu(),
    #         'u': u.cpu(),
    #         't': sol_t.cpu()
    #     },
    #     'data/ns_data.pt'
    # )
    # scipy.io.savemat('data/ns_data.mat', mdict={'a': a.cpu().numpy(), 'u': u.cpu().numpy(), 't': sol_t.cpu().numpy()})

================================================
FILE: solver/periodic.py
================================================
import torch
import torch.fft as fft

import math

#Setup for indexing in the 'ij' format

#Solve: -Lap(u) = f
class Poisson2d(object):

    def __init__(self, s1, s2, L1=2*math.pi, L2=2*math.pi, device=None, dtype=torch.float64):

        self.s1 = s1
        self.s2 = s2

        #Inverse negative Laplacian
        freq_list1 = torch.cat((torch.arange(start=0, end=s1//2, step=1),\
                                torch.arange(start=-s1//2, end=0, step=1)), 0)
        k1 = freq_list1.view(-1,1).repeat(1, s2//2 + 1).type(dtype).to(device)

        freq_list2 = torch.arange(start=0, end=s2//2 + 1, step=1)
        k2 = freq_list2.view(1,-1).repeat(s1, 1).type(dtype).to(device)

        self.inv_lap = ((4*math.pi**2)/(L1**2))*k1**2 + ((4*math.pi**2)/(L2**2))*k2**2
        self.inv_lap[0,0] = 1.0
        self.inv_lap = 1.0/self.inv_lap
    
    def solve(self, f):
        return fft.irfft2(fft.rfft2(f)*self.inv_lap, s=(self.s1, self.s2))
    
    def __call__(self, f):
        return self.solve(f)

#Solve: w_t = - u . grad(w) + (1/Re)*Lap(w) + f
#       u = (psi_y, -psi_x)
#       -Lap(psi) = w
#Note: Adaptive time-step takes smallest step across the batch
class NavierStokes2d(object):

    def __init__(self, s1, s2, L1=2*math.pi, L2=2*math.pi, device=None, dtype=torch.float64):

        self.s1 = s1
        self.s2 = s2

        self.L1 = L1
        self.L2 = L2

        self.h = 1.0/max(s1, s2)

        #Wavenumbers for first derivatives
        freq_list1 = torch.cat((torch.arange(start=0, end=s1//2, step=1),\
                                torch.zeros((1,)),\
                                torch.arange(start=-s1//2 + 1, end=0, step=1)), 0)
        self.k1 = freq_list1.view(-1,1).repeat(1, s2//2 + 1).type(dtype).to(device)


        freq_list2 = torch.cat((torch.arange(start=0, end=s2//2, step=1), torch.zeros((1,))), 0)
        self.k2 = freq_list2.view(1,-1).repeat(s1, 1).type(dtype).to(device)

        #Negative Laplacian
        freq_list1 = torch.cat((torch.arange(start=0, end=s1//2, step=1),\
                                torch.arange(start=-s1//2, end=0, step=1)), 0)
        k1 = freq_list1.view(-1,1).repeat(1, s2//2 + 1).type(dtype).to(device)

        freq_list2 = torch.arange(start=0, end=s2//2 + 1, step=1)
        k2 = freq_list2.view(1,-1).repeat(s1, 1).type(dtype).to(device)

        self.G = ((4*math.pi**2)/(L1**2))*k1**2 + ((4*math.pi**2)/(L2**2))*k2**2

        #Inverse of negative Laplacian
        self.inv_lap = self.G.clone()
        self.inv_lap[0,0] = 1.0
        self.inv_lap = 1.0/self.inv_lap

        #Dealiasing mask using 2/3 rule
        self.dealias = (self.k1**2 + self.k2**2 <= 0.6*(0.25*s1**2 + 0.25*s2**2)).type(dtype).to(device)
        #Ensure mean zero
        self.dealias[0,0] = 0.0

    #Compute stream function from vorticity (Fourier space)
    def stream_function(self, w_h, real_space=False):
        #-Lap(psi) = w
        psi_h = self.inv_lap*w_h

        if real_space:
            return fft.irfft2(psi_h, s=(self.s1, self.s2))
        else:
            return psi_h

    #Compute velocity field from stream function (Fourier space)
    def velocity_field(self, stream_f, real_space=True):
        #Velocity field in x-direction = psi_y
        q_h = (2*math.pi/self.L2)*1j*self.k2*stream_f

        #Velocity field in y-direction = -psi_x
        v_h = -(2*math.pi/self.L1)*1j*self.k1*stream_f

        if real_space:
            return fft.irfft2(q_h, s=(self.s1, self.s2)), fft.irfft2(v_h, s=(self.s1, self.s2))
        else:
            return q_h, v_h

    #Compute non-linear term + forcing from given vorticity (Fourier space)
    def nonlinear_term(self, w_h, f_h=None):
        #Physical space vorticity
        w = fft.irfft2(w_h, s=(self.s1, self.s2))

        #Physical space velocity
        q, v = self.velocity_field(self.stream_function(w_h, real_space=False), real_space=True)

        #Compute non-linear term in Fourier space
        nonlin = -1j*((2*math.pi/self.L1)*self.k1*fft.rfft2(q*w) + (2*math.pi/self.L1)*self.k2*fft.rfft2(v*w))

        #Add forcing function
        if f_h is not None:
            nonlin += f_h

        return nonlin
    
    def time_step(self, q, v, f, Re):
        #Maxixum speed
        max_speed = torch.max(torch.sqrt(q**2 + v**2)).item()

        #Maximum force amplitude
        if f is not None:
            xi = torch.sqrt(torch.max(torch.abs(f))).item()
        else:
            xi = 1.0
        
        #Viscosity
        mu = (1.0/Re)*xi*((self.L1/(2*math.pi))**(3.0/4.0))*(((self.L2/(2*math.pi))**(3.0/4.0)))

        if max_speed == 0:
            return 0.5*(self.h**2)/mu
        
        #Time step based on CFL condition
        return min(0.5*self.h/max_speed, 0.5*(self.h**2)/mu)

    def advance(self, w, f=None, T=1.0, Re=100, adaptive=True, delta_t=1e-3):

        #Rescale Laplacian by Reynolds number
        GG = (1.0/Re)*self.G

        #Move to Fourier space
        w_h = fft.rfft2(w)

        if f is not None:
            f_h = fft.rfft2(f)
        else:
            f_h = None
        
        if adaptive:
            q, v = self.velocity_field(self.stream_function(w_h, real_space=False), real_space=True)
            delta_t = self.time_step(q, v, f, Re)

        time  = 0.0
        #Advance solution in Fourier space
        while time < T:
            if time + delta_t > T:
                current_delta_t = T - time
            else:
                current_delta_t = delta_t

            #Inner-step of Heun's method
            nonlin1 = self.nonlinear_term(w_h, f_h)
            w_h_tilde = (w_h + current_delta_t*(nonlin1 - 0.5*GG*w_h))/(1.0 + 0.5*current_delta_t*GG)

            #Cranck-Nicholson + Heun update
            nonlin2 = self.nonlinear_term(w_h_tilde, f_h)
            w_h = (w_h + current_delta_t*(0.5*(nonlin1 + nonlin2) - 0.5*GG*w_h))/(1.0 + 0.5*current_delta_t*GG)

            #De-alias
            w_h *= self.dealias

            #Update time
            time += current_delta_t

            #New time step
            if adaptive:
                q, v = self.velocity_field(self.stream_function(w_h, real_space=False), real_space=True)
                delta_t = self.time_step(q, v, f, Re)
        
        return fft.irfft2(w_h, s=(self.s1, self.s2))
    
    def __call__(self, w, f=None, T=1.0, Re=100, adaptive=True, delta_t=1e-3):
        return self.advance(w, f, T, Re, adaptive, delta_t)

================================================
FILE: solver/random_fields.py
================================================
import torch

import math

torch.manual_seed(0)


class GaussianRF(object):
    def __init__(self, dim, size, length=1.0, alpha=2.0, tau=3.0, sigma=None, boundary="periodic", constant_eig=False, device=None):

        self.dim = dim
        self.device = device

        if sigma is None:
            sigma = tau**(0.5*(2*alpha - self.dim))

        k_max = size//2

        const = (4*(math.pi**2))/(length**2)

        if dim == 1:
            k = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                           torch.arange(start=-k_max, end=0, step=1, device=device)), 0)

            self.sqrt_eig = size*math.sqrt(2.0)*sigma*((const*(k**2) + tau**2)**(-alpha/2.0))

            if constant_eig:
                self.sqrt_eig[0] = size*sigma*(tau**(-alpha))
            else:
                self.sqrt_eig[0] = 0.0

        elif dim == 2:
            wavenumers = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                                    torch.arange(start=-k_max, end=0, step=1, device=device)), 0).repeat(size,1)

            k_x = wavenumers.transpose(0,1)
            k_y = wavenumers

            self.sqrt_eig = (size**2)*math.sqrt(2.0)*sigma*((const*(k_x**2 + k_y**2) + tau**2)**(-alpha/2.0))

            if constant_eig:
                self.sqrt_eig[0,0] = (size**2)*sigma*(tau**(-alpha))
            else:
                self.sqrt_eig[0,0] = 0.0

        elif dim == 3:
            wavenumers = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                                    torch.arange(start=-k_max, end=0, step=1, device=device)), 0).repeat(size,size,1)

            k_x = wavenumers.transpose(1,2)
            k_y = wavenumers
            k_z = wavenumers.transpose(0,2)

            self.sqrt_eig = (size**3)*math.sqrt(2.0)*sigma*((const*(k_x**2 + k_y**2 + k_z**2) + tau**2)**(-alpha/2.0))

            if constant_eig:
                self.sqrt_eig[0,0,0] = (size**3)*sigma*(tau**(-alpha))
            else:
                self.sqrt_eig[0,0,0] = 0.0

        self.size = []
        for j in range(self.dim):
            self.size.append(size)

        self.size = tuple(self.size)

    def sample(self, N):

        coeff = torch.randn(N, *self.size, dtype=torch.cfloat, device=self.device)
        coeff = self.sqrt_eig*coeff

        u = torch.fft.irfftn(coeff, self.size, norm="backward")
        return u


class GaussianRF2d(object):

    def __init__(self, s1, s2, L1=2*math.pi, L2=2*math.pi, alpha=2.0, tau=3.0, sigma=None, mean=None, boundary="periodic", device=None, dtype=torch.float64):

        self.s1 = s1
        self.s2 = s2

        self.mean = mean

        self.device = device
        self.dtype = dtype

        if sigma is None:
            self.sigma = tau**(0.5*(2*alpha - 2.0))
        else:
            self.sigma = sigma

        const1 = (4*(math.pi**2))/(L1**2)
        const2 = (4*(math.pi**2))/(L2**2)

        freq_list1 = torch.cat((torch.arange(start=0, end=s1//2, step=1),\
                                torch.arange(start=-s1//2, end=0, step=1)), 0)
        k1 = freq_list1.view(-1,1).repeat(1, s2//2 + 1).type(dtype).to(device)

        freq_list2 = torch.arange(start=0, end=s2//2 + 1, step=1)

        k2 = freq_list2.view(1,-1).repeat(s1, 1).type(dtype).to(device)

        self.sqrt_eig = s1*s2*self.sigma*((const1*k1**2 + const2*k2**2 + tau**2)**(-alpha/2.0))
        self.sqrt_eig[0,0] = 0.0

    def sample(self, N, xi=None):
        if xi is None:
            xi  = torch.randn(N, self.s1, self.s2//2 + 1, 2, dtype=self.dtype, device=self.device)
        
        xi[...,0] = self.sqrt_eig*xi [...,0]
        xi[...,1] = self.sqrt_eig*xi [...,1]
        
        u = torch.fft.irfft2(torch.view_as_complex(xi), s=(self.s1, self.s2))

        if self.mean is not None:
            u += self.mean
        
        return u

================================================
FILE: solver/rfsampler.py
================================================
import torch
import math


class GaussianRF(object):
    def __init__(self, dim, size, alpha=2, tau=3, sigma=None, boundary="periodic", device=None):
        self.dim = dim
        self.device = device
        if sigma is None:
            sigma = tau ** (0.5 * (2 * alpha - self.dim))

        k_max = size // 2

        if dim == 1:
            k = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                           torch.arange(start=-k_max, end=0, step=1, device=device)), 0)

            self.sqrt_eig = size * math.sqrt(2.0) * sigma * (
                        (4 * (math.pi ** 2) * (k ** 2) + tau ** 2) ** (-alpha / 2.0))
            self.sqrt_eig[0] = 0.0

        elif dim == 2:
            wavenumers = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                                    torch.arange(start=-k_max, end=0, step=1, device=device)), 0).repeat(size, 1)

            k_x = wavenumers.transpose(0, 1)
            k_y = wavenumers

            self.sqrt_eig = (size ** 2) * math.sqrt(2.0) * sigma * (
                        (4 * (math.pi ** 2) * (k_x ** 2 + k_y ** 2) + tau ** 2) ** (-alpha / 2.0))
            self.sqrt_eig[0, 0] = 0.0

        elif dim == 3:
            wavenumers = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device), \
                                    torch.arange(start=-k_max, end=0, step=1, device=device)), 0).repeat(size, size, 1)

            k_x = wavenumers.transpose(1, 2)
            k_y = wavenumers
            k_z = wavenumers.transpose(0, 2)

            self.sqrt_eig = (size ** 3) * math.sqrt(2.0) * sigma * (
                        (4 * (math.pi ** 2) * (k_x ** 2 + k_y ** 2 + k_z ** 2) + tau ** 2) ** (-alpha / 2.0))
            self.sqrt_eig[0, 0, 0] = 0.0

        self.size = []
        for j in range(self.dim):
            self.size.append(size)

        self.size = tuple(self.size)

    def sample(self, N):

        coeff = torch.randn(N, *self.size, 2, device=self.device)

        coeff[..., 0] = self.sqrt_eig * coeff[..., 0]
        coeff[..., 1] = self.sqrt_eig * coeff[..., 1]

        u = torch.ifft(coeff, self.dim, normalized=False)
        u = u[..., 0]

        return u


================================================
FILE: solver/spectrum.py
================================================
import math

from rfsampler import GaussianRF
import torch
from timeit import default_timer

import scipy.io


# w0: initial vorticity
# f: forcing term
# visc: viscosity (1/Re)
# T: final time
# delta_t: internal time-step for solve (descrease if blow-up)
# record_steps: number of in-time snapshots to record

def navier_stokes_2d(w0, f, visc, T, delta_t=1e-4, record_steps=1):
    '''
    Args:
        w0: initial vorticity
        f: forcing
        visc: 1/Re
        T: final time
        delta_t: internal time-step for solve (decrease if blow-up)
        record_steps: number of in-time snapshots to save
    Returns:

    '''
    # Grid size - must be power of 2
    N = w0.size()[-1]

    # Maximum frequency
    k_max = math.floor(N / 2.0)

    # Number of steps to final time
    steps = math.ceil(T / delta_t)

    # Initial vorticity to Fourier space
    w_h = torch.rfft(w0, 2, normalized=False, onesided=False)

    # Forcing to Fourier space
    f_h = torch.rfft(f, 2, normalized=False, onesided=False)

    # If same forcing for the whole batch
    if len(f_h.size()) < len(w_h.size()):
        f_h = torch.unsqueeze(f_h, 0)

    # Record solution every this number of steps
    record_time = math.floor(steps / record_steps)

    # Wavenumbers in y-direction
    k_y = torch.cat((torch.arange(start=0, end=k_max, step=1, device=w0.device),
                     torch.arange(start=-k_max, end=0, step=1, device=w0.device)), 0).repeat(N, 1)
    # Wavenumbers in x-direction
    k_x = k_y.transpose(0, 1)
    # Negative Laplacian in Fourier space
    lap = 4 * (math.pi ** 2) * (k_x ** 2 + k_y ** 2)
    lap[0, 0] = 1.0
    # Dealiasing mask
    dealias = torch.unsqueeze(
        torch.logical_and(torch.abs(k_y) <= (2.0 / 3.0) * k_max, torch.abs(k_x) <= (2.0 / 3.0) * k_max).float(), 0)

    # Saving solution and time
    sol = torch.zeros(*w0.size(), record_steps, device=w0.device)
    sol_t = torch.zeros(record_steps, device=w0.device)

    # Record counter
    c = 0
    # Physical time
    t = 0.0
    for j in range(steps):
        # Stream function in Fourier space: solve Poisson equation
        psi_h = w_h.clone()
        psi_h[..., 0] = psi_h[..., 0] / lap
        psi_h[..., 1] = psi_h[..., 1] / lap

        # Velocity field in x-direction = psi_y
        q = psi_h.clone()
        temp = q[..., 0].clone()
        q[..., 0] = -2 * math.pi * k_y * q[..., 1]
        q[..., 1] = 2 * math.pi * k_y * temp
        q = torch.irfft(q, 2, normalized=False, onesided=False, signal_sizes=(N, N))

        # Velocity field in y-direction = -psi_x
        v = psi_h.clone()
        temp = v[..., 0].clone()
        v[..., 0] = 2 * math.pi * k_x * v[..., 1]
        v[..., 1] = -2 * math.pi * k_x * temp
        v = torch.irfft(v, 2, normalized=False, onesided=False, signal_sizes=(N, N))

        # Partial x of vorticity
        w_x = w_h.clone()
        temp = w_x[..., 0].clone()
        w_x[..., 0] = -2 * math.pi * k_x * w_x[..., 1]
        w_x[..., 1] = 2 * math.pi * k_x * temp
        w_x = torch.irfft(w_x, 2, normalized=False, onesided=False, signal_sizes=(N, N))

        # Partial y of vorticity
        w_y = w_h.clone()
        temp = w_y[..., 0].clone()
        w_y[..., 0] = -2 * math.pi * k_y * w_y[..., 1]
        w_y[..., 1] = 2 * math.pi * k_y * temp
        w_y = torch.irfft(w_y, 2, normalized=False, onesided=False, signal_sizes=(N, N))

        # Non-linear term (u.grad(w)): compute in physical space then back to Fourier space
        F_h = torch.rfft(q * w_x + v * w_y, 2, normalized=False, onesided=False)

        # Dealias
        F_h[..., 0] = dealias * F_h[..., 0]
        F_h[..., 1] = dealias * F_h[..., 1]

        # Cranck-Nicholson update
        w_h[..., 0] = (-delta_t * F_h[..., 0] + delta_t * f_h[..., 0] + (1.0 - 0.5 * delta_t * visc * lap) * w_h[
            ..., 0]) / (1.0 + 0.5 * delta_t * visc * lap)
        w_h[..., 1] = (-delta_t * F_h[..., 1] + delta_t * f_h[..., 1] + (1.0 - 0.5 * delta_t * visc * lap) * w_h[
            ..., 1]) / (1.0 + 0.5 * delta_t * visc * lap)

        # Update real time (used only for recording)
        t += delta_t

        if (j + 1) % record_time == 0:
            # Solution in physical space
            w = torch.irfft(w_h, 2, normalized=False, onesided=False, signal_sizes=(N, N))

            # Record solution and time
            sol[..., c] = w
            sol_t[c] = t

            c += 1
    return sol, sol_t


if __name__ == '__main__':
    device = torch.device('cuda')

    # Resolution
    s = 256
    sub = 1

    # Number of solutions to generate
    N = 20

    # Set up 2d GRF with covariance parameters
    GRF = GaussianRF(2, s, alpha=2.5, tau=7, device=device)

    # Forcing function: 0.1*(sin(2pi(x+y)) + cos(2pi(x+y)))
    t = torch.linspace(0, 1, s + 1, device=device)
    t = t[0:-1]

    X, Y = torch.meshgrid(t, t)
    f = 0.1 * (torch.sin(2 * math.pi * (X + Y)) + torch.cos(2 * math.pi * (X + Y)))

    # Number of snapshots from solution
    record_steps = 200

    # Inputs
    a = torch.zeros(N, s, s)
    # Solutions
    u = torch.zeros(N, s, s, record_steps)

    # Solve equations in batches (order of magnitude speed-up)

    # Batch size
    bsize = 20

    c = 0
    t0 = default_timer()
    for j in range(N // bsize):
        # Sample random feilds
        w0 = GRF.sample(bsize)

        # Solve NS
        sol, sol_t = navier_stokes_2d(w0, f, 1e-3, 50.0, 1e-4, record_steps)

        a[c:(c + bsize), ...] = w0
        u[c:(c + bsize), ...] = sol

        c += bsize
        t1 = default_timer()
        print(j, c, t1 - t0)

    scipy.io.savemat('ns_data.mat', mdict={'a': a.cpu().numpy(), 'u': u.cpu().numpy(), 't': sol_t.cpu().numpy()})


================================================
FILE: train_PINO3d.py
================================================
import yaml
from argparse import ArgumentParser
import random
import torch
from torch.utils.data import DataLoader
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP

from train_utils import Adam
from train_utils.datasets import NSLoader
from train_utils.data_utils import data_sampler
from train_utils.losses import get_forcing
from train_utils.train_3d import train
from train_utils.distributed import setup, cleanup
from train_utils.utils import requires_grad

from models import FNO3d, FNO2d


def subprocess_fn(rank, args):
    if args.distributed:
        setup(rank, args.num_gpus)
    print(f'Running on rank {rank}')

    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    # construct dataloader
    data_config = config['data']
    if 'datapath2' in data_config:
        loader = NSLoader(datapath1=data_config['datapath'], datapath2=data_config['datapath2'],
                          nx=data_config['nx'], nt=data_config['nt'],
                          sub=data_config['sub'], sub_t=data_config['sub_t'],
                          N=data_config['total_num'],
                          t_interval=data_config['time_interval'])
    else:
        loader = NSLoader(datapath1=data_config['datapath'],
                          nx=data_config['nx'], nt=data_config['nt'],
                          sub=data_config['sub'], sub_t=data_config['sub_t'],
                          N=data_config['total_num'],
                          t_interval=data_config['time_interval'])
    if args.start != -1:
        config['data']['offset'] = args.start
    trainset = loader.make_dataset(data_config['n_sample'],
                               start=data_config['offset'])
    train_loader = DataLoader(trainset, batch_size=config['train']['batchsize'],
                              sampler=data_sampler(trainset,
                                                   shuffle=data_config['shuffle'],
                                                   distributed=args.distributed),
                              drop_last=True)

    # construct model
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers']).to(rank)

    if 'ckpt' in config['train']:
        ckpt_path = config['train']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)

    if args.distributed:
        model = DDP(model, device_ids=[rank], broadcast_buffers=False)

    if 'twolayer' in config['train'] and config['train']['twolayer']:
        requires_grad(model, False)
        requires_grad(model.sp_convs[-1], True)
        requires_grad(model.ws[-1], True)
        requires_grad(model.fc1, True)
        requires_grad(model.fc2, True)
        params = []
        for param in model.parameters():
            if param.requires_grad == True:
                params.append(param)
    else:
        params = model.parameters()

    optimizer = Adam(params, betas=(0.9, 0.999),
                     lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=config['train']['milestones'],
                                                     gamma=config['train']['scheduler_gamma'])
    forcing = get_forcing(loader.S).to(rank)
    train(model,
          loader, train_loader,
          optimizer, scheduler,
          forcing, config,
          rank,
          log=args.log,
          project=config['log']['project'],
          group=config['log']['group'])

    if args.distributed:
        cleanup()
    print(f'Process {rank} done!...')


if __name__ == '__main__':
    seed = random.randint(1, 10000)
    print(f'Random seed :{seed}')
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True
    parser =ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--num_gpus', type=int, help='Number of GPUs', default=1)
    parser.add_argument('--start', type=int, default=-1, help='start index')
    args = parser.parse_args()
    args.distributed = args.num_gpus > 1

    if args.distributed:
        mp.spawn(subprocess_fn, args=(args, ), nprocs=args.num_gpus)
    else:
        subprocess_fn(0, args)


================================================
FILE: train_burgers.py
================================================
from argparse import ArgumentParser
import yaml

import torch
from models import FNO2d
from train_utils import Adam
from train_utils.datasets import BurgersLoader
from train_utils.train_2d import train_2d_burger
from train_utils.eval_2d import eval_burgers


def run(args, config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    dataset = BurgersLoader(data_config['datapath'],
                            nx=data_config['nx'], nt=data_config['nt'],
                            sub=data_config['sub'], sub_t=data_config['sub_t'], new=True)
    train_loader = dataset.make_loader(n_sample=data_config['n_sample'],
                                       batch_size=config['train']['batchsize'],
                                       start=data_config['offset'])

    model = FNO2d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'],
                  act=config['model']['act']).to(device)
    # Load from checkpoint
    if 'ckpt' in config['train']:
        ckpt_path = config['train']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    optimizer = Adam(model.parameters(), betas=(0.9, 0.999),
                     lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=config['train']['milestones'],
                                                     gamma=config['train']['scheduler_gamma'])
    train_2d_burger(model,
                    train_loader,
                    dataset.v,
                    optimizer,
                    scheduler,
                    config,
                    rank=0,
                    log=args.log,
                    project=config['log']['project'],
                    group=config['log']['group'])


def test(config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']
    dataset = BurgersLoader(data_config['datapath'],
                            nx=data_config['nx'], nt=data_config['nt'],
                            sub=data_config['sub'], sub_t=data_config['sub_t'], new=True)
    dataloader = dataset.make_loader(n_sample=data_config['n_sample'],
                                     batch_size=config['test']['batchsize'],
                                     start=data_config['offset'])

    model = FNO2d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'],
                  act=config['model']['act']).to(device)
    # Load from checkpoint
    if 'ckpt' in config['test']:
        ckpt_path = config['test']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    eval_burgers(model, dataloader, dataset.v, config, device)


if __name__ == '__main__':
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--mode', type=str, help='train or test')
    args = parser.parse_args()

    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)
    if args.mode == 'train':
        run(args, config)
    else:
        test(config)


================================================
FILE: train_darcy.py
================================================
import os
import yaml
import random
from argparse import ArgumentParser
from tqdm import tqdm

import numpy as np
import torch

from torch.optim import Adam
from torch.utils.data import DataLoader

from models import FNO2d

from train_utils.losses import LpLoss, darcy_loss 
from train_utils.datasets import DarcyFlow, DarcyIC, sample_data
from train_utils.utils import save_ckpt, count_params, dict2str

try:
    import wandb
except ImportError:
    wandb = None


def get_molifier(mesh, device):
    mollifier = 0.001 * torch.sin(np.pi * mesh[..., 0]) * torch.sin(np.pi * mesh[..., 1])
    return mollifier.to(device)


@torch.no_grad()
def eval_darcy(model, val_loader, criterion, 
               device='cpu'):
    mollifier = get_molifier(val_loader.dataset.mesh, device)
    model.eval()
    val_err = []
    for a, u in val_loader:
        a, u = a.to(device), u.to(device)
        out = model(a).squeeze(dim=-1)
        out = out * mollifier
        val_loss = criterion(out, u)
        val_err.append(val_loss.item())
    N = len(val_loader)

    avg_err = np.mean(val_err)
    std_err = np.std(val_err, ddof=1) / np.sqrt(N)
    return avg_err, std_err


def train(model, 
          train_u_loader,        # training data
          ic_loader,             # loader for initial conditions
          val_loader,            # validation data
          optimizer, 
          scheduler,
          device, config, args):
    save_step = config['train']['save_step']
    eval_step = config['train']['eval_step']

    f_weight = config['train']['f_loss']
    xy_weight = config['train']['xy_loss']

    # set up directory
    base_dir = os.path.join('exp', config['log']['logdir'])
    ckpt_dir = os.path.join(base_dir, 'ckpts')
    os.makedirs(ckpt_dir, exist_ok=True)

    # loss fn
    lploss = LpLoss(size_average=True)
    # mollifier
    u_mol = get_molifier(train_u_loader.dataset.mesh, device)
    ic_mol = get_molifier(ic_loader.dataset.mesh, device)
    # set up wandb
    if wandb and args.log:
        run = wandb.init(project=config['log']['project'], 
                         entity=config['log']['entity'], 
                         group=config['log']['group'], 
                         config=config, reinit=True, 
                         settings=wandb.Settings(start_method='fork'))
    pbar = range(config['train']['num_iter'])
    pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.2)

    u_loader = sample_data(train_u_loader)
    ic_loader = sample_data(ic_loader)
    for e in pbar:
        log_dict = {}

        optimizer.zero_grad()
        # data loss
        if xy_weight > 0:
            ic, u = next(u_loader)
            u = u.to(device)
            ic = ic.to(device)
            out = model(ic).squeeze(dim=-1)
            out = out * u_mol
            data_loss = lploss(out, u)
        else:
            data_loss = torch.zeros(1, device=device)
        
        if f_weight > 0:
            # pde loss
            ic = next(ic_loader)
            ic = ic.to(device)
            out = model(ic).squeeze(dim=-1)
            out = out * ic_mol
            u0 = ic[..., 0]
            f_loss = darcy_loss(out, u0)
            log_dict['PDE'] = f_loss.item()
        else:
            f_loss = 0.0
        loss = data_loss * xy_weight + f_loss * f_weight

        loss.backward()
        optimizer.step()
        scheduler.step()

        log_dict['train loss'] = loss.item()
        log_dict['data'] = data_loss.item()
        if e % eval_step == 0:
            eval_err, std_err = eval_darcy(model, val_loader, lploss, device)
            log_dict['val error'] = eval_err
        logstr = dict2str(log_dict)
        pbar.set_description(
            (
                logstr
            )
        )
        if wandb and args.log:
            wandb.log(log_dict)
        if e % save_step == 0 and e > 0:
            ckpt_path = os.path.join(ckpt_dir, f'model-{e}.pt')
            save_ckpt(ckpt_path, model, optimizer, scheduler)

    # clean up wandb
    if wandb and args.log:
        run.finish()


def subprocess(args):
    with open(args.config, 'r') as f:
        config = yaml.load(f, yaml.FullLoader)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # set random seed
    config['seed'] = args.seed
    seed = args.seed
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    # create model 
    model = FNO2d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'], 
                  act=config['model']['act'], 
                  pad_ratio=config['model']['pad_ratio']).to(device)
    num_params = count_params(model)
    config['num_params'] = num_params
    print(f'Number of parameters: {num_params}')
    # Load from checkpoint
    if args.ckpt:
        ckpt_path = args.ckpt
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    
    if args.test:
        batchsize = config['test']['batchsize']
        testset = DarcyFlow(datapath=config['test']['path'], 
                            nx=config['test']['nx'], 
                            sub=config['test']['sub'], 
                            offset=config['test']['offset'], 
                            num=config['test']['n_sample'])
        testloader = DataLoader(testset, batch_size=batchsize, num_workers=4)
        criterion = LpLoss()
        test_err, std_err = eval_darcy(model, testloader, criterion, device)
        print(f'Averaged test relative L2 error: {test_err}; Standard error: {std_err}')
    else:
        # training set
        batchsize = config['train']['batchsize']
        u_set = DarcyFlow(datapath=config['data']['path'], 
                          nx=config['data']['nx'], 
                          sub=config['data']['sub'], 
                          offset=config['data']['offset'], 
                          num=config['data']['n_sample'])
        u_loader = DataLoader(u_set, batch_size=batchsize, num_workers=4, shuffle=True)
        ic_set = DarcyIC(datapath=config['data']['path'], 
                         nx=config['data']['nx'], 
                         sub=config['data']['pde_sub'], 
                         offset=config['data']['offset'], 
                         num=config['data']['n_sample'])
        ic_loader = DataLoader(ic_set, batch_size=batchsize, num_workers=4, shuffle=True)
        # val set
        valset = DarcyFlow(datapath=config['test']['path'], 
                           nx=config['test']['nx'], 
                           sub=config['test']['sub'], 
                           offset=config['test']['offset'], 
                           num=config['test']['n_sample'])
        val_loader = DataLoader(valset, batch_size=batchsize, num_workers=4)
        print(f'Train set: {len(u_set)}; test set: {len(valset)}.')
        optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                         milestones=config['train']['milestones'], 
                                                         gamma=config['train']['scheduler_gamma'])
        if args.ckpt:
            ckpt = torch.load(ckpt_path)
            optimizer.load_state_dict(ckpt['optim'])
            scheduler.load_state_dict(ckpt['scheduler'])
        train(model, 
              u_loader,
              ic_loader, 
              val_loader, 
              optimizer, scheduler, 
              device, 
              config, args)
              
    print('Done!')
        
        
if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    # parse options
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--ckpt', type=str, default=None)
    parser.add_argument('--test', action='store_true', help='Test')
    args = parser.parse_args()
    if args.seed is None:
        args.seed = random.randint(0, 100000)
    subprocess(args)

================================================
FILE: train_no.py
================================================
import os
import yaml
import random
from argparse import ArgumentParser
import math
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset


from models import FNO3d
from train_utils.adam import Adam

from train_utils.losses import LpLoss, PINO_loss3d, get_forcing
from train_utils.datasets import NS3DDataset, KFDataset
from train_utils.utils import save_ckpt, count_params

try:
    import wandb
except ImportError:
    wandb = None


def pad_input(x, num_pad):
    if num_pad >0:
        res = F.pad(x, (0, 0, 0, num_pad), 'constant', 0)
    else:
        res = x
    return res


def train_ns(model, 
             train_loader, 
             val_loader,
             optimizer, 
             scheduler,
             device, config, args):
    # parse configuration
    v = 1/ config['data']['Re']
    t_duration = config['data']['t_duration']
    num_pad = config['model']['num_pad']
    save_step = config['train']['save_step']
    ic_weight = config['train']['ic_loss']
    f_weight = config['train']['f_loss']
    xy_weight = config['train']['xy_loss']
    # set up directory
    base_dir = os.path.join('exp', config['log']['logdir'])
    ckpt_dir = os.path.join(base_dir, 'ckpts')
    os.makedirs(ckpt_dir, exist_ok=True)

    # loss fn
    lploss = LpLoss(size_average=True)
    S = config['data']['pde_res'][0]
    data_s_step = train_loader.dataset.dataset.data_s_step
    data_t_step = train_loader.dataset.dataset.data_t_step
    forcing = get_forcing(S).to(device)
    # set up wandb
    if wandb and args.log:
        run = wandb.init(project=config['log']['project'], 
                         entity=config['log']['entity'], 
                         group=config['log']['group'], 
                         config=config, reinit=True, 
                         settings=wandb.Settings(start_method='fork'))
    pbar = range(config['train']['epochs'])
    pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.2)
    zero = torch.zeros(1).to(device)
    for e in pbar:
        loss_dict = {
            'train_loss': 0.0, 
            'ic_loss': 0.0, 
            'pde_loss': 0.0
        }

        # train 
        model.train()
        for u, a in train_loader:
            u, a = u.to(device), a.to(device)
            optimizer.zero_grad()

            if ic_weight == 0.0 and f_weight == 0.0:
                # FNO
                a_in = a[:, ::data_s_step, ::data_s_step, ::data_t_step]
                out = model(a_in)
                loss_ic, loss_f = zero, zero
                loss = lploss(out, u)
            else:
                # PINO
                a_in = a
                out = model(a_in)
                # PDE loss
                u0 = a[:, :, :, 0, -1]
                loss_ic, loss_f = PINO_loss3d(out, u0, forcing, v, t_duration)
                # data loss
                # print(out.shape)
                # print(u.shape)
                data_loss = lploss(out[:, ::data_s_step, ::data_s_step, ::data_t_step], u)
                loss = data_loss * xy_weight + loss_f * f_weight + loss_ic * ic_weight
            
            loss.backward()
            optimizer.step()

            loss_dict['train_loss'] += loss.item()
            loss_dict['ic_loss'] += loss_ic.item()
            loss_dict['pde_loss'] += loss_f.item()
        scheduler.step()
        
        loader_size = len(train_loader)
        train_loss = loss_dict['train_loss'] / loader_size
        ic_loss = loss_dict['ic_loss'] / loader_size
        pde_loss = loss_dict['pde_loss'] / loader_size

        # eval
        model.eval()
        with torch.no_grad():
            val_error = 0.0
            for u, a in val_loader:
                u, a = u.to(device), a.to(device)

                if ic_weight == 0.0 and f_weight == 0.0:
                    # FNO
                    a = a[:, ::data_s_step, ::data_s_step, ::data_t_step]
                    a_in = a
                    out = model(a_in)
                    data_loss = lploss(out, u)
                else:
                    # PINO
                    a_in = a
                    out = model(a_in)
                    # data loss
                    data_loss = lploss(out[:, ::data_s_step, ::data_s_step, ::data_t_step], u)
                val_error += data_loss.item()
            avg_val_error = val_error / len(val_loader)

        pbar.set_description(
            (
                f'Train loss: {train_loss}. IC loss: {ic_loss}, PDE loss: {pde_loss}, val error: {avg_val_error}'
            )
        )
        log_dict = {
            'Train loss': train_loss, 
            'IC loss': ic_loss, 
            'PDE loss': pde_loss, 
            'Val error': avg_val_error
        }

        if wandb and args.log:
            wandb.log(log_dict)
        if e % save_step == 0:
            ckpt_path = os.path.join(ckpt_dir, f'model-{e}.pt')
            save_ckpt(ckpt_path, model, optimizer)
    # clean up wandb
    if wandb and args.log:
        run.finish()


def eval_ns(model, val_loader, device, config, args):
    # parse configuration
    v = 1/ config['data']['Re']
    t_duration = config['data']['t_duration']
    num_pad = config['model']['num_pad']

    model.eval()
    # loss fn
    lploss = LpLoss(size_average=True)
    S = config['data']['pde_res'][0]
    data_s_step = val_loader.dataset.data_s_step
    data_t_step = val_loader.dataset.data_t_step

    with torch.no_grad():
        val_error = 0.0
        for u, a in tqdm(val_loader):
            u, a = u.to(device), a.to(device)
            # a = a[:, ::data_s_step, ::data_s_step, ::data_t_step]
            a_in = a
            out = model(a_in)
            out = out[:, ::data_s_step, ::data_s_step, ::data_t_step]
            data_loss = lploss(out, u)
            val_error += data_loss.item()
        avg_val_err = val_error / len(val_loader)

    print(f'Average relative L2 error {avg_val_err}')


def subprocess(args):
    with open(args.config, 'r') as f:
        config = yaml.load(f, yaml.FullLoader)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # set random seed
    config['seed'] = args.seed
    seed = args.seed
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    # create model 
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'], 
                  act=config['model']['act'], 
                  pad_ratio=config['model']['pad_ratio']).to(device)
    num_params = count_params(model)
    config['num_params'] = num_params
    print(f'Number of parameters: {num_params}')
    # Load from checkpoint
    if args.ckpt:
        ckpt_path = args.ckpt
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)

    datasets = {
        'KF': KFDataset, 
        'NS': NS3DDataset
    }
    if 'name' in config['data']:
        dataname = config['data']['name']
    else:
        dataname = 'NS'

    if args.test:
        batchsize = config['test']['batchsize']
        testset = datasets[dataname](paths=config['data']['paths'], 
                                     raw_res=config['data']['raw_res'],
                                     data_res=config['test']['data_res'], 
                                     pde_res=config['data']['pde_res'], 
                                     n_samples=config['data']['n_test_samples'], 
                                     offset=config['data']['testoffset'], 
                                     t_duration=config['data']['t_duration'])
        test_loader = DataLoader(testset, batch_size=batchsize, num_workers=4, shuffle=True)
        eval_ns(model, test_loader, device, config, args)

    else:
        # prepare datast
        batchsize = config['train']['batchsize']

        dataset = datasets[dataname](paths=config['data']['paths'], 
                                     raw_res=config['data']['raw_res'],
                                     data_res=config['data']['data_res'], 
                                     pde_res=config['data']['pde_res'], 
                                     n_samples=config['data']['n_samples'], 
                                     offset=config['data']['offset'], 
                                     t_duration=config['data']['t_duration'])
        idxs = torch.randperm(len(dataset))
        # setup train and test
        num_test = config['data']['n_test_samples']
        num_train = len(idxs) - num_test
        print(f'Number of training samples: {num_train};\nNumber of test samples: {num_test}.')
        train_idx = idxs[:num_train]
        test_idx = idxs[num_train:]

        trainset = Subset(dataset, indices=train_idx)
        valset = Subset(dataset, indices=test_idx)

        train_loader = DataLoader(trainset, batch_size=batchsize, num_workers=4, shuffle=True)

        val_loader = DataLoader(valset, batch_size=batchsize, num_workers=4)
        optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                         milestones=config['train']['milestones'], 
                                                         gamma=config['train']['scheduler_gamma'])
        print(dataset.data.shape)
        train_ns(model, train_loader, val_loader, 
                optimizer, scheduler, device, config, args)
    print('Done!')


if __name__ == '__main__':

    torch.backends.cudnn.benchmark = True
    # parse options
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--ckpt', type=str, default=None)
    parser.add_argument('--test', action='store_true', help='Test')
    args = parser.parse_args()
    if args.seed is None:
        args.seed = random.randint(0, 100000)
    subprocess(args)

================================================
FILE: train_operator.py
================================================
import yaml
from argparse import ArgumentParser
import math
import torch
from torch.utils.data import DataLoader

from solver.random_fields import GaussianRF
from train_utils import Adam
from train_utils.datasets import NSLoader, online_loader, DarcyFlow, DarcyCombo
from train_utils.train_3d import mixed_train
from train_utils.train_2d import train_2d_operator
from models import FNO3d, FNO2d


def train_3d(args, config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']

    # prepare dataloader for training with data
    if 'datapath2' in data_config:
        loader = NSLoader(datapath1=data_config['datapath'], datapath2=data_config['datapath2'],
                          nx=data_config['nx'], nt=data_config['nt'],
                          sub=data_config['sub'], sub_t=data_config['sub_t'],
                          N=data_config['total_num'],
                          t_interval=data_config['time_interval'])
    else:
        loader = NSLoader(datapath1=data_config['datapath'],
                          nx=data_config['nx'], nt=data_config['nt'],
                          sub=data_config['sub'], sub_t=data_config['sub_t'],
                          N=data_config['total_num'],
                          t_interval=data_config['time_interval'])

    train_loader = loader.make_loader(data_config['n_sample'],
                                      batch_size=config['train']['batchsize'],
                                      start=data_config['offset'],
                                      train=data_config['shuffle'])
    # prepare dataloader for training with only equations
    gr_sampler = GaussianRF(2, data_config['S2'], 2 * math.pi, alpha=2.5, tau=7, device=device)
    a_loader = online_loader(gr_sampler,
                             S=data_config['S2'],
                             T=data_config['T2'],
                             time_scale=data_config['time_interval'],
                             batchsize=config['train']['batchsize'])
    # create model
    print(device)
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'], 
                  act=config['model']['act']).to(device)
    # Load from checkpoint
    if 'ckpt' in config['train']:
        ckpt_path = config['train']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    # create optimizer and learning rate scheduler
    optimizer = Adam(model.parameters(), betas=(0.9, 0.999),
                     lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=config['train']['milestones'],
                                                     gamma=config['train']['scheduler_gamma'])
    mixed_train(model,
                train_loader,
                loader.S, loader.T,
                a_loader,
                data_config['S2'], data_config['T2'],
                optimizer,
                scheduler,
                config,
                device,
                log=args.log,
                project=config['log']['project'],
                group=config['log']['group'])


def train_2d(args, config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    data_config = config['data']

    # dataset = DarcyFlow(data_config['datapath'],
                        # nx=data_config['nx'], sub=data_config['sub'],
                        # offset=data_config['offset'], num=data_config['n_sample'])

    dataset = DarcyCombo(datapath=data_config['datapath'], 
                         nx=data_config['nx'], 
                         sub=data_config['sub'], 
                         pde_sub=data_config['pde_sub'], 
                         num=data_config['n_samples'], 
                         offset=data_config['offset'])
    train_loader = DataLoader(dataset, batch_size=config['train']['batchsize'], shuffle=True)
    model = FNO2d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'],
                  act=config['model']['act'], 
                  pad_ratio=config['model']['pad_ratio']).to(device)
    # Load from checkpoint
    if 'ckpt' in config['train']:
        ckpt_path = config['train']['ckpt']
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)

    optimizer = Adam(model.parameters(), betas=(0.9, 0.999),
                     lr=config['train']['base_lr'])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=config['train']['milestones'],
                                                     gamma=config['train']['scheduler_gamma'])
    train_2d_operator(model,
                      train_loader,
                      optimizer, scheduler,
                      config, rank=0, log=args.log,
                      project=config['log']['project'],
                      group=config['log']['group'])


if __name__ == '__main__':
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # parse options
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config_path', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    args = parser.parse_args()

    config_file = args.config_path
    with open(config_file, 'r') as stream:
        config = yaml.load(stream, yaml.FullLoader)

    if 'name' in config['data'] and config['data']['name'] == 'Darcy':
        train_2d(args, config)
    else:
        train_3d(args, config)


================================================
FILE: train_pino.py
================================================
from datetime import datetime
import os
import yaml
import random
from argparse import ArgumentParser
import math
from tqdm import tqdm

import numpy as np

import torch
from torch.optim import Adam
from torch.utils.data import DataLoader

from models import FNO3d

from train_utils.losses import LpLoss, PINO_loss3d, get_forcing
from train_utils.datasets import KFDataset, KFaDataset, sample_data
from train_utils.utils import save_ckpt, count_params, dict2str

try:
    import wandb
except ImportError:
    wandb = None


@torch.no_grad()
def eval_ns(model, val_loader, criterion, device):
    model.eval()
    val_err = []
    for u, a in val_loader:
        u, a = u.to(device), a.to(device)
        out = model(a)
        val_loss = criterion(out, u)
        val_err.append(val_loss.item())

    N = len(val_loader)

    avg_err = np.mean(val_err)
    std_err = np.std(val_err, ddof=1) / np.sqrt(N)
    return avg_err, std_err


def train_ns(model, 
             train_u_loader,        # training data
             train_a_loader,        # initial conditions
             val_loader,            # validation data
             optimizer, 
             scheduler,
             device, config, args):
    start_iter = config['train']['start_iter']
    v = 1/ config['data']['Re']
    t_duration = config['data']['t_duration']
    save_step = config['train']['save_step']
    eval_step = config['train']['eval_step']

    ic_weight = config['train']['ic_loss']
    f_weight = config['train']['f_loss']
    xy_weight = config['train']['xy_loss']
    # set up directory
    base_dir = os.path.join('exp', config['log']['logdir'])
    ckpt_dir = os.path.join(base_dir, 'ckpts')
    os.makedirs(ckpt_dir, exist_ok=True)

    # loss fn
    lploss = LpLoss(size_average=True)
    
    S = config['data']['pde_res'][0]
    forcing = get_forcing(S).to(device)
    # set up wandb
    if wandb and args.log:
        run = wandb.init(project=config['log']['project'], 
                         entity=config['log']['entity'], 
                         group=config['log']['group'], 
                         config=config, reinit=True, 
                         settings=wandb.Settings(start_method='fork'))
    
    pbar = range(start_iter, config['train']['num_iter'])
    if args.tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.2)

    u_loader = sample_data(train_u_loader)
    a_loader = sample_data(train_a_loader)

    for e in pbar:
        log_dict = {}

        optimizer.zero_grad()
        # data loss
        if xy_weight > 0:
            u, a_in = next(u_loader)
            u = u.to(device)
            a_in = a_in.to(device)
            out = model(a_in)
            data_loss = lploss(out, u)
        else:
            data_loss = torch.zeros(1, device=device)

        if f_weight != 0.0:
            # pde loss
            a = next(a_loader)
            a = a.to(device)
            out = model(a)
            
            u0  = a[:, :, :, 0, -1]
            loss_ic, loss_f = PINO_loss3d(out, u0, forcing, v, t_duration)
            log_dict['IC'] = loss_ic.item()
            log_dict['PDE'] = loss_f.item()
        else:
            loss_ic = loss_f = 0.0
        loss = data_loss * xy_weight + loss_f * f_weight + loss_ic * ic_weight

        loss.backward()
        optimizer.step()
        scheduler.step()

        log_dict['train loss'] = loss.item()
        log_dict['data'] = data_loss.item()
        if e % eval_step == 0:
            eval_err, std_err = eval_ns(model, val_loader, lploss, device)
            log_dict['val error'] = eval_err
        
        if args.tqdm:
            logstr = dict2str(log_dict)
            pbar.set_description(
                (
                    logstr
                )
            )
        if wandb and args.log:
            wandb.log(log_dict)
        if e % save_step == 0 and e > 0:
            ckpt_path = os.path.join(ckpt_dir, f'model-{e}.pt')
            save_ckpt(ckpt_path, model, optimizer, scheduler)

    # clean up wandb
    if wandb and args.log:
        run.finish()


def subprocess(args):
    with open(args.config, 'r') as f:
        config = yaml.load(f, yaml.FullLoader)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # set random seed
    config['seed'] = args.seed
    seed = args.seed
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    # create model 
    model = FNO3d(modes1=config['model']['modes1'],
                  modes2=config['model']['modes2'],
                  modes3=config['model']['modes3'],
                  fc_dim=config['model']['fc_dim'],
                  layers=config['model']['layers'], 
                  act=config['model']['act'], 
                  pad_ratio=config['model']['pad_ratio']).to(device)
    num_params = count_params(model)
    config['num_params'] = num_params
    print(f'Number of parameters: {num_params}')
    # Load from checkpoint
    if args.ckpt:
        ckpt_path = args.ckpt
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    
    if args.test:
        batchsize = config['test']['batchsize']
        testset = KFDataset(paths=config['data']['paths'], 
                            raw_res=config['data']['raw_res'],
                            data_res=config['test']['data_res'], 
                            pde_res=config['test']['data_res'], 
                            n_samples=config['data']['n_test_samples'], 
                            offset=config['data']['testoffset'], 
                            t_duration=config['data']['t_duration'])
        testloader = DataLoader(testset, batch_size=batchsize, num_workers=4)
        criterion = LpLoss()
        test_err, std_err = eval_ns(model, testloader, criterion, device)
        print(f'Averaged test relative L2 error: {test_err}; Standard error: {std_err}')
    else:
        # training set
        batchsize = config['train']['batchsize']
        u_set = KFDataset(paths=config['data']['paths'], 
                          raw_res=config['data']['raw_res'],
                          data_res=config['data']['data_res'], 
                          pde_res=config['data']['data_res'], 
                          n_samples=config['data']['n_data_samples'], 
                          offset=config['data']['offset'], 
                          t_duration=config['data']['t_duration'])
        u_loader = DataLoader(u_set, batch_size=batchsize, num_workers=4, shuffle=True)

        a_set = KFaDataset(paths=config['data']['paths'], 
                           raw_res=config['data']['raw_res'], 
                           pde_res=config['data']['pde_res'], 
                           n_samples=config['data']['n_a_samples'],
                           offset=config['data']['a_offset'], 
                           t_duration=config['data']['t_duration'])
        a_loader = DataLoader(a_set, batch_size=batchsize, num_workers=4, shuffle=True)
        # val set
        valset = KFDataset(paths=config['data']['paths'], 
                           raw_res=config['data']['raw_res'],
                           data_res=config['test']['data_res'], 
                           pde_res=config['test']['data_res'], 
                           n_samples=config['data']['n_test_samples'], 
                           offset=config['data']['testoffset'], 
                           t_duration=config['data']['t_duration'])
        val_loader = DataLoader(valset, batch_size=batchsize, num_workers=4)
        print(f'Train set: {len(u_set)}; Test set: {len(valset)}; IC set: {len(a_set)}')
        optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                         milestones=config['train']['milestones'], 
                                                         gamma=config['train']['scheduler_gamma'])
        if args.ckpt:
            ckpt = torch.load(ckpt_path)
            optimizer.load_state_dict(ckpt['optim'])
            scheduler.load_state_dict(ckpt['scheduler'])
            config['train']['start_iter'] = scheduler.last_epoch
        train_ns(model, 
                 u_loader, a_loader, 
                 val_loader, 
                 optimizer, scheduler, 
                 device, 
                 config, args)
    print('Done!')
        
        
if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    # parse options
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--ckpt', type=str, default=None)
    parser.add_argument('--test', action='store_true', help='Test')
    parser.add_argument('--tqdm', action='store_true', help='Turn on the tqdm')
    args = parser.parse_args()
    if args.seed is None:
        args.seed = random.randint(0, 100000)
    subprocess(args)

================================================
FILE: train_unet.py
================================================
from datetime import datetime
import os
import yaml
import random
from argparse import ArgumentParser
import math
from tqdm import tqdm

import numpy as np

import torch
from torch.optim import Adam
from torch.utils.data import DataLoader

from baselines.unet3d import UNet3D

from train_utils.losses import LpLoss
from train_utils.datasets import KFDataset, KFaDataset, sample_data
from train_utils.utils import save_ckpt, count_params, dict2str

try:
    import wandb
except ImportError:
    wandb = None


@torch.no_grad()
def eval_ns(model, val_loader, criterion, device):
    model.eval()
    val_err = []
    for u, a in val_loader:
        u, a = u.to(device), a.to(device)
        a = a.permute(0, 4, 3, 1, 2)
        out = model(a)
        out = out.squeeze(1).permute(0, 2, 3, 1)
        val_loss = criterion(out, u)
        val_err.append(val_loss.item())

    N = len(val_loader)

    avg_err = np.mean(val_err)
    std_err = np.std(val_err, ddof=1) / np.sqrt(N)
    return avg_err, std_err


def train_ns(model, 
             train_u_loader,        # training data
             val_loader,            # validation data
             optimizer, 
             scheduler,
             device, config, args):
    start_iter = config['train']['start_iter']
    v = 1/ config['data']['Re']
    save_step = config['train']['save_step']
    eval_step = config['train']['eval_step']

    # set up directory
    base_dir = os.path.join('exp', config['log']['logdir'])
    ckpt_dir = os.path.join(base_dir, 'ckpts')
    os.makedirs(ckpt_dir, exist_ok=True)

    # loss fn
    lploss = LpLoss(size_average=True)
    
    S = config['data']['pde_res'][0]
    # set up wandb
    if wandb and args.log:
        run = wandb.init(project=config['log']['project'], 
                         entity=config['log']['entity'], 
                         group=config['log']['group'], 
                         config=config, reinit=True, 
                         settings=wandb.Settings(start_method='fork'))
    
    pbar = range(start_iter, config['train']['num_iter'])
    if args.tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.2)

    u_loader = sample_data(train_u_loader)

    for e in pbar:
        log_dict = {}

        optimizer.zero_grad()
        # data loss
        u, a_in = next(u_loader)
        u = u.to(device)
        a_in = a_in.to(device).permute(0, 4, 3, 1, 2)   # B, C, T, X, Y
        out = model(a_in)
        out = out.squeeze(1).permute(0, 2, 3, 1)   # B, X, Y, T
        data_loss = lploss(out, u)

        loss = data_loss

        loss.backward()
        optimizer.step()
        scheduler.step()

        log_dict['train loss'] = loss.item()
        if e % eval_step == 0:
            eval_err, std_err = eval_ns(model, val_loader, lploss, device)
            log_dict['val error'] = eval_err
        
        if args.tqdm:
            logstr = dict2str(log_dict)
            pbar.set_description(
                (
                    logstr
                )
            )
        if wandb and args.log:
            wandb.log(log_dict)
        if e % save_step == 0 and e > 0:
            ckpt_path = os.path.join(ckpt_dir, f'model-{e}.pt')
            save_ckpt(ckpt_path, model, optimizer, scheduler)

    # clean up wandb
    if wandb and args.log:
        run.finish()


def subprocess(args):
    with open(args.config, 'r') as f:
        config = yaml.load(f, yaml.FullLoader)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # set random seed
    config['seed'] = args.seed
    seed = args.seed
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    # create model 
    model = UNet3D(in_channels=4, out_channels=1, f_maps=64, final_sigmoid=False).to(device)
    num_params = count_params(model)
    config['num_params'] = num_params
    print(f'Number of parameters: {num_params}')
    # Load from checkpoint
    if args.ckpt:
        ckpt_path = args.ckpt
        ckpt = torch.load(ckpt_path)
        model.load_state_dict(ckpt['model'])
        print('Weights loaded from %s' % ckpt_path)
    
    if args.test:
        batchsize = config['test']['batchsize']
        testset = KFDataset(paths=config['data']['paths'], 
                            raw_res=config['data']['raw_res'],
                            data_res=config['test']['data_res'], 
                            pde_res=config['test']['data_res'], 
                            n_samples=config['data']['n_test_samples'], 
                            offset=config['data']['testoffset'], 
                            t_duration=config['data']['t_duration'])
        testloader = DataLoader(testset, batch_size=batchsize, num_workers=4)
        criterion = LpLoss()
        test_err, std_err = eval_ns(model, testloader, criterion, device)
        print(f'Averaged test relative L2 error: {test_err}; Standard error: {std_err}')
    else:
        # training set
        batchsize = config['train']['batchsize']
        u_set = KFDataset(paths=config['data']['paths'], 
                          raw_res=config['data']['raw_res'],
                          data_res=config['data']['data_res'], 
                          pde_res=config['data']['data_res'], 
                          n_samples=config['data']['n_data_samples'], 
                          offset=config['data']['offset'], 
                          t_duration=config['data']['t_duration'])
        u_loader = DataLoader(u_set, batch_size=batchsize, num_workers=4, shuffle=True)

        # val set
        valset = KFDataset(paths=config['data']['paths'], 
                           raw_res=config['data']['raw_res'],
                           data_res=config['test']['data_res'], 
                           pde_res=config['test']['data_res'], 
                           n_samples=config['data']['n_test_samples'], 
                           offset=config['data']['testoffset'], 
                           t_duration=config['data']['t_duration'])
        val_loader = DataLoader(valset, batch_size=batchsize, num_workers=4)
        print(f'Train set: {len(u_set)}; Test set: {len(valset)}.')
        optimizer = Adam(model.parameters(), lr=config['train']['base_lr'])
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                         milestones=config['train']['milestones'], 
                                                         gamma=config['train']['scheduler_gamma'])
        if args.ckpt:
            ckpt = torch.load(ckpt_path)
            optimizer.load_state_dict(ckpt['optim'])
            scheduler.load_state_dict(ckpt['scheduler'])
            config['train']['start_iter'] = scheduler.last_epoch
        train_ns(model, 
                 u_loader, 
                 val_loader, 
                 optimizer, scheduler, 
                 device, 
                 config, args)
    print('Done!')
        
        
if __name__ == '__main__':
    torch.backends.cudnn.benchmark = True
    # parse options
    parser = ArgumentParser(description='Basic paser')
    parser.add_argument('--config', type=str, help='Path to the configuration file')
    parser.add_argument('--log', action='store_true', help='Turn on the wandb')
    parser.add_argument('--seed', type=int, default=None)
    parser.add_argument('--ckpt', type=str, default=None)
    parser.add_argument('--test', action='store_true', help='Test')
    parser.add_argument('--tqdm', action='store_true', help='Turn on the tqdm')
    args = parser.parse_args()
    if args.seed is None:
        args.seed = random.randint(0, 100000)
    subprocess(args)


================================================
FILE: train_utils/__init__.py
================================================
from .adam import Adam
from .datasets import NSLoader, DarcyFlow
from .losses import get_forcing, LpLoss

================================================
FILE: train_utils/adam.py
================================================
import math
import torch
from torch import Tensor
from typing import List, Optional
from torch.optim.optimizer import Optimizer


def adam(params: List[Tensor],
         grads: List[Tensor],
         exp_avgs: List[Tensor],
         exp_avg_sqs: List[Tensor],
         max_exp_avg_sqs: List[Tensor],
         state_steps: List[int],
         *,
         amsgrad: bool,
         beta1: float,
         beta2: float,
         lr: float,
         weight_decay: float,
         eps: float):
    r"""Functional API that performs Adam algorithm computation.
    See :class:`~torch.optim.Adam` for details.
    """

    for i, param in enumerate(params):

        grad = grads[i]
        exp_avg = exp_avgs[i]
        exp_avg_sq = exp_avg_sqs[i]
        step = state_steps[i]

        bias_correction1 = 1 - beta1 ** step
        bias_correction2 = 1 - beta2 ** step

        if weight_decay != 0:
            grad = grad.add(param, alpha=weight_decay)

        # Decay the first and second moment running average coefficient
        exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
        exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2)
        if amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.maximum(max_exp_avg_sqs[i], exp_avg_sq, out=max_exp_avg_sqs[i])
            # Use the max. for normalizing running avg. of gradient
            denom = (max_exp_avg_sqs[i].sqrt() / math.sqrt(bias_correction2)).add_(eps)
        else:
            denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps)

        step_size = lr / bias_correction1

        param.addcdiv_(exp_avg, denom, value=-step_size)


class Adam(Optimizer):
    r"""Implements Adam algorithm.
    It has been proposed in `Adam: A Method for Stochastic Optimization`_.
    The implementation of the L2 penalty follows changes proposed in
    `Decoupled Weight Decay Regularization`_.
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_
            (default: False)
    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _Decoupled Weight Decay Regularization:
        https://arxiv.org/abs/1711.05101
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ
    """

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
                 weight_decay=0, amsgrad=False):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
        if not 0.0 <= weight_decay:
            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
        defaults = dict(lr=lr, betas=betas, eps=eps,
                        weight_decay=weight_decay, amsgrad=amsgrad)
        super(Adam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(Adam, self).__setstate__(state)
        for group in self.param_groups:
            group.setdefault('amsgrad', False)

    @torch.no_grad()
    def step(self, closure=None):
        """Performs a single optimization step.
        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            params_with_grad = []
            grads = []
            exp_avgs = []
            exp_avg_sqs = []
            max_exp_avg_sqs = []
            state_steps = []
            beta1, beta2 = group['betas']

            for p in group['params']:
                if p.grad is not None:
                    params_with_grad.append(p)
                    if p.grad.is_sparse:
                        raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
                    grads.append(p.grad)

                    state = self.state[p]
                    # Lazy state initialization
                    if len(state) == 0:
                        state['step'] = 0
                        # Exponential moving average of gradient values
                        state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                        # Exponential moving average of squared gradient values
                        state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                        if group['amsgrad']:
                            # Maintains max of all exp. moving avg. of sq. grad. values
                            state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)

                    exp_avgs.append(state['exp_avg'])
                    exp_avg_sqs.append(state['exp_avg_sq'])

                    if group['amsgrad']:
                        max_exp_avg_sqs.append(state['max_exp_avg_sq'])

                    # update the steps for each param group update
                    state['step'] += 1
                    # record the step after step update
                    state_steps.append(state['step'])

            adam(params_with_grad,
                 grads,
                 exp_avgs,
                 exp_avg_sqs,
                 max_exp_avg_sqs,
                 state_steps,
                 amsgrad=group['amsgrad'],
                 beta1=beta1,
                 beta2=beta2,
                 lr=group['lr'],
                 weight_decay=group['weight_decay'],
                 eps=group['eps'])
        return loss


================================================
FILE: train_utils/data_utils.py
================================================
from torch.utils import data


def sample_data(loader):
    while True:
        for batch in loader:
            yield batch


def data_sampler(dataset, shuffle, distributed):
    if distributed:
        return data.distributed.DistributedSampler(dataset, shuffle=shuffle)

    if shuffle:
        return data.RandomSampler(dataset)

    else:
        return data.SequentialSampler(dataset)

================================================
FILE: train_utils/datasets.py
================================================
import scipy.io
import numpy as np

try:
    from pyDOE import lhs
    # Only needed for PINN's dataset
except ImportError:
    lhs = None

import torch
from torch.utils.data import Dataset
from .utils import get_grid3d, convert_ic, torch2dgrid


def online_loader(sampler, S, T, time_scale, batchsize=1):
    while True:
        u0 = sampler.sample(batchsize)
        a = convert_ic(u0, batchsize,
                       S, T,
                       time_scale=time_scale)
        yield a


def sample_data(loader):
    while True:
        for batch in loader:
            yield batch


class MatReader(object):
    def __init__(self, file_path, to_torch=True, to_cuda=False, to_float=True):
        super(MatReader, self).__init__()

        self.to_torch = to_torch
        self.to_cuda = to_cuda
        self.to_float = to_float

        self.file_path = file_path

        self.data = None
        self.old_mat = None
        self._load_file()

    def _load_file(self):
        self.data = scipy.io.loadmat(self.file_path)
        self.old_mat = True

    def load_file(self, file_path):
        self.file_path = file_path
        self._load_file()

    def read_field(self, field):
        x = self.data[field]

        if not self.old_mat:
            x = x[()]
            x = np.transpose(x, axes=range(len(x.shape) - 1, -1, -1))

        if self.to_float:
            x = x.astype(np.float32)

        if self.to_torch:
            x = torch.from_numpy(x)

            if self.to_cuda:
                x = x.cuda()

        return x

    def set_cuda(self, to_cuda):
        self.to_cuda = to_cuda

    def set_torch(self, to_torch):
        self.to_torch = to_torch

    def set_float(self, to_float):
        self.to_float = to_float


class BurgersLoader(object):
    def __init__(self, datapath, nx=2 ** 10, nt=100, sub=8, sub_t=1, new=False):
        dataloader = MatReader(datapath)
        self.sub = sub
        self.sub_t = sub_t
        self.s = nx // sub
        self.T = nt // sub_t
        self.new = new
        if new:
            self.T += 1
        self.x_data = dataloader.read_field('input')[:, ::sub]
        self.y_data = dataloader.read_field('output')[:, ::sub_t, ::sub]
        self.v = dataloader.read_field('visc').item()

    def make_loader(self, n_sample, batch_size, start=0, train=True):
        Xs = self.x_data[start:start + n_sample]
        ys = self.y_data[start:start + n_sample]

        if self.new:
            gridx = torch.tensor(np.linspace(0, 1, self.s + 1)[:-1], dtype=torch.float)
            gridt = torch.tensor(np.linspace(0, 1, self.T), dtype=torch.float)
        else:
            gridx = torch.tensor(np.linspace(0, 1, self.s), dtype=torch.float)
            gridt = torch.tensor(np.linspace(0, 1, self.T + 1)[1:], dtype=torch.float)
        gridx = gridx.reshape(1, 1, self.s)
        gridt = gridt.reshape(1, self.T, 1)

        Xs = Xs.reshape(n_sample, 1, self.s).repeat([1, self.T, 1])
        Xs = torch.stack([Xs, gridx.repeat([n_sample, self.T, 1]), gridt.repeat([n_sample, 1, self.s])], dim=3)
        dataset = torch.utils.data.TensorDataset(Xs, ys)
        if train:
            loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
        else:
            loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
        return loader


class NSLoader(object):
    def __init__(self, datapath1,
                 nx, nt,
                 datapath2=None, sub=1, sub_t=1,
                 N=100, t_interval=1.0):
        '''
        Load data from npy and reshape to (N, X, Y, T)
        Args:
            datapath1: path to data
            nx:
            nt:
            datapath2: path to second part of data, default None
            sub:
            sub_t:
            N:
            t_interval:
        '''
        self.S = nx // sub
        self.T = int(nt * t_interval) // sub_t + 1
        self.time_scale = t_interval
        data1 = np.load(datapath1)
        data1 = torch.tensor(data1, dtype=torch.float)[..., ::sub_t, ::sub, ::sub]

        if datapath2 is not None:
            data2 = np.load(datapath2)
            data2 = torch.tensor(data2, dtype=torch.float)[..., ::sub_t, ::sub, ::sub]
        if t_interval == 0.5:
            data1 = self.extract(data1)
            if datapath2 is not None:
                data2 = self.extract(data2)
        part1 = data1.permute(0, 2, 3, 1)
        if datapath2 is not None:
            part2 = data2.permute(0, 2, 3, 1)
            self.data = torch.cat((part1, part2), dim=0)
        else:
            self.data = part1

    def make_loader(self, n_sample, batch_size, start=0, train=True):
        if train:
            a_data = self.data[start:start + n_sample, :, :, 0].reshape(n_sample, self.S, self.S)
            u_data = self.data[start:start + n_sample].reshape(n_sample, self.S, self.S, self.T)
        else:
            a_data = self.data[-n_sample:, :, :, 0].reshape(n_sample, self.S, self.S)
            u_data = self.data[-n_sample:].reshape(n_sample, self.S, self.S, self.T)
        a_data = a_data.reshape(n_sample, self.S, self.S, 1, 1).repeat([1, 1, 1, self.T, 1])
        gridx, gridy, gridt = get_grid3d(self.S, self.T, time_scale=self.time_scale)
        a_data = torch.cat((gridx.repeat([n_sample, 1, 1, 1, 1]), gridy.repeat([n_sample, 1, 1, 1, 1]),
                            gridt.repeat([n_sample, 1, 1, 1, 1]), a_data), dim=-1)
        dataset = torch.utils.data.TensorDataset(a_data, u_data)
        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train)
        return loader

    def make_dataset(self, n_sample, start=0, train=True):
        if train:
            a_data = self.data[start:start + n_sample, :, :, 0].reshape(n_sample, self.S, self.S)
            u_data = self.data[start:start + n_sample].reshape(n_sample, self.S, self.S, self.T)
        else:
            a_data = self.data[-n_sample:, :, :, 0].reshape(n_sample, self.S, self.S)
            u_data = self.data[-n_sample:].reshape(n_sample, self.S, self.S, self.T)
        a_data = a_data.reshape(n_sample, self.S, self.S, 1, 1).repeat([1, 1, 1, self.T, 1])
        gridx, gridy, gridt = get_grid3d(self.S, self.T)
        a_data = torch.cat((
            gridx.repeat([n_sample, 1, 1, 1, 1]),
            gridy.repeat([n_sample, 1, 1, 1, 1]),
            gridt.repeat([n_sample, 1, 1, 1, 1]),
            a_data), dim=-1)
        dataset = torch.utils.data.TensorDataset(a_data, u_data)
        return dataset

    @staticmethod
    def extract(data):
        '''
        Extract data with time range 0-0.5, 0.25-0.75, 0.5-1.0, 0.75-1.25,...
        Args:
            data: tensor with size N x 129 x 128 x 128

        Returns:
            output: (4*N-1) x 65 x 128 x 128
        '''
        T = data.shape[1] // 2
        interval = data.shape[1] // 4
        N = data.shape[0]
        new_data = torch.zeros(4 * N - 1, T + 1, data.shape[2], data.shape[3])
        for i in range(N):
            for j in range(4):
                if i == N - 1 and j == 3:
                    # reach boundary
                    break
                if j != 3:
                    new_data[i * 4 + j] = data[i, interval * j:interval * j + T + 1]
                else:
                    new_data[i * 4 + j, 0: interval] = data[i, interval * j:interval * j + interval]
                    new_data[i * 4 + j, interval: T + 1] = data[i + 1, 0:interval + 1]
        return new_data


class NS3DDataset(Dataset):
    def __init__(self, paths, 
                 data_res, pde_res,
                 n_samples=None, 
                 offset=0,
                 t_duration=1.0, 
                 sub_x=1, 
                 sub_t=1,
                 train=True):
        super().__init__()
        self.data_res = data_res
        self.pde_res = pde_res
        self.t_duration = t_duration
        self.paths = paths
        self.offset = offset
        self.n_samples = n_samples
        self.load(train=train, sub_x=sub_x, sub_t=sub_t)
    
    def load(self, train=True, sub_x=1, sub_t=1):
        data_list = []
        for datapath in self.paths:
            batch = np.load(datapath, mmap_mode='r')

            batch = torch.from_numpy(batch[:, ::sub_t, ::sub_x, ::sub_x]).to(torch.float32)
            if self.t_duration == 0.5:
                batch = self.extract(batch)
            data_list.append(batch.permute(0, 2, 3, 1))
        data = torch.cat(data_list, dim=0)
        if self.n_samples:
            if train:
                data = data[self.offset: self.offset + self.n_samples]
            else:
                data = data[self.offset + self.n_samples:]
        
        N = data.shape[0]
        S = data.shape[1]
        T = data.shape[-1]
        a_data = data[:, :, :, 0:1, None].repeat([1, 1, 1, T, 1])
        gridx, gridy, gridt = get_grid3d(S, T)
        a_data = torch.cat((
            gridx.repeat([N, 1, 1, 1, 1]),
            gridy.repeat([N, 1, 1, 1, 1]),
            gridt.repeat([N, 1, 1, 1, 1]),
            a_data), dim=-1)
        self.data = data        # N, S, S, T, 1
        self.a_data = a_data    # N, S, S, T, 4
        
        self.data_s_step = data.shape[1] // self.data_res[0]
        self.data_t_step = data.shape[3] // (self.data_res[2] - 1)

    def __getitem__(self, idx):
        return self.data[idx, ::self.data_s_step, ::self.data_s_step, ::self.data_t_step], self.a_data[idx]

    def __len__(self, ):
        return self.data.shape[0]

    @staticmethod
    def extract(data):
        '''
        Extract data with time range 0-0.5, 0.25-0.75, 0.5-1.0, 0.75-1.25,...
        Args:
            data: tensor with size N x 129 x 128 x 128

        Returns:
            output: (4*N-1) x 65 x 128 x 128
        '''
        T = data.shape[1] // 2
        interval = data.shape[1] // 4
        N = data.shape[0]
        new_data = torch.zeros(4 * N - 1, T + 1, data.shape[2], data.shape[3])
        for i in range(N):
            for j in range(4):
                if i == N - 1 and j == 3:
                    # reach boundary
                    break
                if j != 3:
                    new_data[i * 4 + j] = data[i, interval * j:interval * j + T + 1]
                else:
                    new_data[i * 4 + j, 0: interval] = data[i, interval * j:interval * j + interval]
                    new_data[i * 4 + j, interval: T + 1] = data[i + 1, 0:interval + 1]
        return new_data


class KFDataset(Dataset):
    def __init__(self, paths, 
                 data_res, pde_res, 
                 raw_res, 
                 n_samples=None, 
                 total_samples=None,
                 idx=0,
                 offset=0,
                 t_duration=1.0):
        super().__init__()
        self.data_res = data_res    # data resolution
        self.pde_res = pde_res      # pde loss resolution
        self.raw_res = raw_res      # raw data resolution
        self.t_duration = t_duration
        self.paths = paths
        self.offset = offset
        self.n_samples = n_samples
        if t_duration == 1.0:
            self.T = self.pde_res[2]
        else:
            self.T = int(self.pde_res[2] * t_duration) + 1    # number of points in time dimension

        self.load()
        if total_samples is not None:
            print(f'Load {total_samples} samples starting from {idx}th sample')
            self.data = self.data[idx:idx + total_samples]
            self.a_data = self.a_data[idx:idx + total_samples]
            
        self.data_s_step = pde_res[0] // data_res[0]
        self.data_t_step = (pde_res[2] - 1) // (data_res[2] - 1)

    def load(self):
        datapath = self.paths[0]
        raw_data = np.load(datapath, mmap_mode='r')
        # subsample ratio
        sub_x = self.raw_res[0] // self.data_res[0]
        sub_t = (self.raw_res[2] - 1) // (self.data_res[2] - 1)
        
        a_sub_x = self.raw_res[0] // self.pde_res[0]
        # load data
        data = raw_data[self.offset: self.offset + self.n_samples, ::sub_t, ::sub_x, ::sub_x]
        # divide data
        if self.t_duration != 0.:
            end_t = self.raw_res[2] - 1
            K = int(1/self.t_duration)
            step = end_t // K
            data = self.partition(data)
            a_data = raw_data[self.offset: self.offset + self.n_samples, 0:end_t:step, ::a_sub_x, ::a_sub_x]
            a_data = a_data.reshape(self.n_samples * K, 1, self.pde_res[0], self.pde_res[1])    # 2N x 1 x S x S
        else:
            a_data = raw_data[self.offset: self.offset + self.n_samples, 0:1, ::a_sub_x, ::a_sub_x]

        # convert into torch tensor
        data = torch.from_numpy(data).to(torch.float32)
        a_data = torch.from_numpy(a_data).to(torch.float32).permute(0, 2, 3, 1)
        self.data = data.permute(0, 2, 3, 1)

        S = self.pde_res[1]
        
        a_data = a_data[:, :, :, :, None]   # N x S x S x 1 x 1
        gridx, gridy, gridt = get_grid3d(S, self.T)
        self.grid = torch.cat((gridx[0], gridy[0], gridt[0]), dim=-1)   # S x S x T x 3
        self.a_data = a_data

    def partition(self, data):
        '''
        Args:
            data: tensor with size N x T x S x S

        Returns:
            output: int(1/t_duration) *N x (T//2 + 1) x 128 x 128
        '''
        N, T, S = data.shape[:3]
        K = int(1 / self.t_duration)
        new_data = np.zeros((K * N, T // K + 1, S, S))
        step = T // K
        for i in range(N):
            for j in range(K):
                new_data[i * K + j] = data[i, j * step: (j+1) * step + 1]
        return new_data


    def __getitem__(self, idx):
        a_data = torch.cat((
            self.grid, 
            self.a_data[idx].repeat(1, 1, self.T, 1)
        ), dim=-1)
        return self.data[idx], a_data

    def __len__(self, ):
        return self.data.shape[0]


class BurgerData(Dataset):
    '''
    members: 
        - t, x, Exact: raw data
        - X, T: meshgrid 
        - X_star, u_star: flattened (x, t), u array
        - lb, ub: lower bound and upper bound vector
        - X_u, u: boundary condition data (x, t), u
    '''

    def __init__(self, datapath):
        data = scipy.io.loadmat(datapath)

        # raw 2D data
        self.t = data['t'].flatten()[:, None]  # (100,1)
        self.x = data['x'].flatten()[:, None]  # (256, 1)
        self.Exact = np.real(data['usol']).T  # (100, 256)

        # Flattened sequence
        self.get_flatten_data()
        self.get_boundary_data()

    def __len__(self):
        return self.Exact.shape[0]

    def __getitem__(self, idx):
        return self.X_star[idx], self.u_star[idx]

    def get_flatten_data(self):
        X, T = np.meshgrid(self.x, self.t)
        self.X, self.T = X, T
        self.X_star = np.hstack((X.flatten()[:, None], T.flatten()[:, None]))
        self.u_star = self.Exact.flatten()[:, None]

        # lower bound of (x, t): 2-dimensional vector
        self.lb = self.X_star.min(0)
        # upper bound of (x, t): 2-dimensional vector
        self.ub = self.X_star.max(0)

    def get_boundary_data(self):
        xx1 = np.hstack((self.X[0:1, :].T, self.T[0:1, :].T))
        uu1 = self.Exact[0:1, :].T
        xx2 = np.hstack((self.X[:, 0:1], self.T[:, 0:1]))
        uu2 = self.Exact[:, 0:1]
        xx3 = np.hstack((self.X[:, -1:], self.T[:, -1:]))
        uu3 = self.Exact[:, -1:]
        self.X_u = np.vstack([xx1, xx2, xx3])
        self.u = np.vstack([uu1, uu2, uu3])

    def sample_xt(self, N=10000):
        '''
        Sample (x, t) pairs within the boundary
        Return:
            - X_f: (N, 2) array
        '''
        X_f = self.lb + (self.ub - self.lb) * lhs(2, N)
        X_f = np.vstack((X_f, self.X_u))
        return X_f

    def sample_xu(self, N=100):
        '''
        Sample N points from boundary data
        Return: 
            - X_u: (N, 2) array 
            - u: (N, 1) array
        '''
        idx = np.random.choice(self.X_u.shape[0], N, replace=False)
        X_u = self.X_u[idx, :]
        u = self.u[idx, :]
        return X_u, u


class DarcyFlow(Dataset):
    def __init__(self,
                 datapath,
                 nx, sub,
                 offset=0,
                 num=1):
        self.S = int(nx // sub) + 1 if sub > 1 else nx
        data = scipy.io.loadmat(datapath)
        a = data['coeff']
        u = data['sol']
        self.a = torch.tensor(a[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.u = torch.tensor(u[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.mesh = torch2dgrid(self.S, self.S)

    def __len__(self):
        return self.a.shape[0]

    def __getitem__(self, item):
        fa = self.a[item]
        return torch.cat([fa.unsqueeze(2), self.mesh], dim=2), self.u[item]


class DarcyIC(Dataset):
    def __init__(self,
                 datapath,
                 nx, sub,
                 offset=0,
                 num=1):
        self.S = int(nx // sub) + 1 if sub > 1 else nx
        data = scipy.io.loadmat(datapath)
        a = data['coeff']
        self.a = torch.tensor(a[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.mesh = torch2dgrid(self.S, self.S)
        data = scipy.io.loadmat(datapath)
        a = data['coeff']
        u = data['sol']
        self.a = torch.tensor(a[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.u = torch.tensor(u[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.mesh = torch2dgrid(self.S, self.S)

    def __len__(self):
        return self.a.shape[0]

    def __getitem__(self, item):
        fa = self.a[item]
        return torch.cat([fa.unsqueeze(2), self.mesh], dim=2) 


class DarcyCombo(Dataset):
    def __init__(self, 
                 datapath, 
                 nx, 
                 sub, pde_sub, 
                 num=1000, offset=0) -> None:
        super().__init__()
        self.S = int(nx // sub) + 1 if sub > 1 else nx
        self.pde_S = int(nx // pde_sub) + 1 if sub > 1 else nx
        data = scipy.io.loadmat(datapath)
        a = data['coeff']
        u = data['sol']
        self.a = torch.tensor(a[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.u = torch.tensor(u[offset: offset + num, ::sub, ::sub], dtype=torch.float)
        self.mesh = torch2dgrid(self.S, self.S)
        self.pde_a = torch.tensor(a[offset: offset + num, ::pde_sub, ::pde_sub], dtype=torch.float)
        self.pde_mesh = torch2dgrid(self.pde_S, self.pde_S)

    def __len__(self):
        return self.a.shape[0]

    def __getitem__(self, item):
        fa = self.a[item]
        pde_a = self.pde_a[item]
        data_ic = torch.cat([fa.unsqueeze(2), self.mesh], dim=2)
        pde_ic = torch.cat([pde_a.unsqueeze(2), self.pde_mesh], dim=2)
        return data_ic, self.u[item], pde_ic

'''
dataset class for loading initial conditions for Komogrov flow
'''
class KFaDataset(Dataset):
    def __init__(self, paths, 
                 pde_res, 
                 raw_res, 
                 n_samples=None, 
                 offset=0,
                 t_duration=1.0):
        super().__init__()
        self.pde_res = pde_res      # pde loss resolution
        self.raw_res = raw_res      # raw data resolution
        self.t_duration = t_duration
        self.paths = paths
        self.offset = offset
        self.n_samples = n_samples
        if t_duration == 1.0:
            self.T = self.pde_res[2]
        else:
            self.T = int(self.pde_res[2] * t_duration) + 1    # number of points in time dimension

        self.load()

    def load(self):
        datapath = self.paths[0]
        raw_data = np.load(datapath, mmap_mode='r')
        # subsample ratio
        a_sub_x = self.raw_res[0] // self.pde_res[0]
        # load data
        if self.t_duration != 0.:
            end_t = self.raw_res[2] - 1
            K = int(1/self.t_duration)
            step = end_t // K
            a_data = raw_data[self.offset: self.offset + self.n_samples, 0:end_t:step, ::a_sub_x, ::a_sub_x]
            a_data = a_data.reshape(self.n_samples * K, 1, self.pde_res[0], self.pde_res[1])    # 2N x 1 x S x S
        else:
            a_data = raw_data[self.offset: self.offset + self.n_samples, 0:1, ::a_sub_x, ::a_sub_x]

        # convert into torch tensor
        a_data = torch.from_numpy(a_data).to(torch.float32).permute(0, 2, 3, 1)
        S = self.pde_res[1]
        a_data = a_data[:, :, :, :, None]   # N x S x S x 1 x 1
        gridx, gridy, gridt = get_grid3d(S, self.T)
        self.grid = torch.cat((gridx[0], gridy[0], gridt[0]), dim=-1)   # S x S x T x 3
        self.a_data = a_data

    def __getitem__(self, idx):
        a_data = torch.cat((
            self.grid, 
            self.a_data[idx].repeat(1, 1, self.T, 1)
        ), dim=-1)
        return a_data

    def __len__(self, ):
        return self.a_data.shape[0]

================================================
FILE: train_utils/distributed.py
================================================
import os
import torch
import torch.distributed as dist


def setup(rank, world_size):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '7777'
    dist.init_process_group("nccl", rank=rank, world_size=world_size)


def cleanup():
    dist.destroy_process_group()


def get_world_size():
    if not dist.is_available() or not dist.is_initialized():
        return 1

    return dist.get_world_size()


def all_reduce_mean(tensor):
    '''
    Reduce the tensor across all machines, the operation is in-place.
    :param tensor: tensor to reduce
    :return: reduced tensor
    '''
    if not dist.is_available() or not dist.is_initialized():
        return tensor

    world_size = get_world_size()
    dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
    return tensor.div_(world_size)


def reduce_sum(tensor):
    '''
    Reduce the tensor across all machines. Only process with rank 0 will receive the final result
    Args:
        tensor: input and ouput of the collective. The function operates in-place
    Returns:
        final result
    '''
    if not dist.is_available() or not dist.is_initialized():
        return tensor

    dist.reduce(tensor, dst=0, op=dist.ReduceOp.SUM)
    return tensor


def reduce_loss_dict(loss_dict):
    if not dist.is_available() or dist.is_initialized():
        return loss_dict
    world_size = get_world_size()

    if world_size < 2:
        return loss_dict

    with torch.no_grad():
        keys = []
        losses = []

        for k in sorted(loss_dict.keys()):
            keys.append(k)
            losses.append(loss_dict[k])

        losses = torch.stack(losses, 0)
        dist.reduce(losses, dst=0)

        if dist.get_rank() == 0:
            losses /= world_size

        reduced_losses = {k: v for k, v in zip(keys, losses)}

    return reduced_losses

================================================
FILE: train_utils/eval_2d.py
================================================
from tqdm import tqdm
import numpy as np

import torch

from .losses import LpLoss, darcy_loss, PINO_loss

try:
    import wandb
except ImportError:
    wandb = None


def eval_darcy(model,
               dataloader,
               config,
               device,
               use_tqdm=True):
    model.eval()
    myloss = LpLoss(size_average=True)
    if use_tqdm:
        pbar = tqdm(dataloader, dynamic_ncols=True, smoothing=0.05)
    else:
        pbar = dataloader

    mesh = dataloader.dataset.mesh
    mollifier = torch.sin(np.pi * mesh[..., 0]) * torch.sin(np.pi * mesh[..., 1]) * 0.001
    mollifier = mollifier.to(device)
    f_val = []
    test_err = []

    with torch.no_grad():
        for x, y in pbar:
            x, y = x.to(device), y.to(device)

            pred = model(x).reshape(y.shape)
            pred = pred * mollifier

            data_loss = myloss(pred, y)
            a = x[..., 0]
            f_loss = darcy_loss(pred, a)

            test_err.append(data_loss.item())
            f_val.append(f_loss.item())
            if use_tqdm:
                pbar.set_description(
                    (
                        f'Equation error: {f_loss.item():.5f}, test l2 error: {data_loss.item()}'
                    )
                )
    mean_f_err = np.mean(f_val)
    std_f_err = np.std(f_val, ddof=1) / np.sqrt(len(f_val))

    mean_err = np.mean(test_err)
    std_err = np.std(test_err, ddof=1) / np.sqrt(len(test_err))

    print(f'==Averaged relative L2 error mean: {mean_err}, std error: {std_err}==\n'
          f'==Averaged equation error mean: {mean_f_err}, std error: {std_f_err}==')


def eval_burgers(model,
                 dataloader,
                 v,
                 config,
                 device,
                 use_tqdm=True):
    model.eval()
    myloss = LpLoss(size_average=True)
    if use_tqdm:
        pbar = tqdm(dataloader, dynamic_ncols=True, smoothing=0.05)
    else:
        pbar = dataloader

    test_err = []
    f_err = []

    for x, y in pbar:
        x, y = x.to(device), y.to(device)
        out = model(x).reshape(y.shape)
        data_loss = myloss(out, y)

        loss_u, f_loss = PINO_loss(out, x[:, 0, :, 0], v)
        test_err.append(data_loss.item())
        f_err.append(f_loss.item())

    mean_f_err = np.mean(f_err)
    std_f_err = np.std(f_err, ddof=1) / np.sqrt(len(f_err))

    mean_err = np.mean(test_err)
    std_err = np.std(test_err, ddof=1) / np.sqrt(len(test_err))

    print(f'==Averaged relative L2 error mean: {mean_err}, std error: {std_err}==\n'
          f'==Averaged equation error mean: {mean_f_err}, std error: {std_f_err}==')


================================================
FILE: train_utils/eval_3d.py
================================================
import torch
import torch.nn.functional as F

from tqdm import tqdm
from timeit import default_timer

from .losses import LpLoss, PINO_loss3d

try:
    import wandb
except ImportError:
    wandb = None


def eval_ns(model,  # model
            loader,  # dataset instance
            dataloader,  # dataloader
            forcing,  # forcing
            config,  # configuration dict
            device,  # device id
            log=False,
            project='PINO-default',
            group='FDM',
            tags=['Nan'],
            use_tqdm=True):
    '''
    Evaluate the model for Navier Stokes equation
    '''
    if wandb and log:
        run = wandb.init(project=project,
                         entity=config['log']['entity'],
                         group=group,
                         config=config,
                         tags=tags, reinit=True,
                         settings=wandb.Settings(start_method="fork"))
    # data parameters
    v = 1 / config['data']['Re']
    S, T = loader.S, loader.T
    t_interval = config['data']['time_interval']
    # eval settings
    batch_size = config['test']['batchsize']

    model.eval()
    myloss = LpLoss(size_average=True)
    if use_tqdm:
        pbar = tqdm(dataloader, dynamic_ncols=True, smoothing=0.05)
    else:
        pbar = dataloader
    loss_dict = {'f_error': 0.0,
                 'test_l2': 0.0}
    start_time = default_timer()
    with torch.no_grad():
        for x, y in pbar:
            x, y = x.to(device), y.to(device)
            x_in = F.pad(x, (0, 0, 0, 5), "constant", 0)
            out = model(x_in).reshape(batch_size, S, S, T + 5)
            out = out[..., :-5]
            x = x[:, :, :, 0, -1]
            loss_l2 = myloss(out.view(batch_size, S, S, T), y.view(batch_size, S, S, T))
            loss_ic, loss_f = PINO_loss3d(out.view(batch_size, S, S, T), x, forcing, v, t_interval)

            loss_dict['f_error'] += loss_f
            loss_dict['test_l2'] += loss_l2
            if device == 0 and use_tqdm:
                pbar.set_description(
                    (
                        f'Train f error: {loss_f.item():.5f}; Test l2 error: {loss_l2.item():.5f}'
                    )
                )
    end_time = default_timer()
    test_l2 = loss_dict['test_l2'].item() / len(dataloader)
    loss_f = loss_dict['f_error'].item() / len(dataloader)
    print(f'==Averaged relative L2 error is: {test_l2}==\n'
          f'==Averaged equation error is: {loss_f}==')
    print(f'Time cost: {end_time - start_time} s')
    if device == 0:
        if wandb and log:
            wandb.log(
                {
                    'Train f error': loss_f,
                    'Test L2 error': test_l2,
                }
            )
            run.finish()


================================================
FILE: train_utils/losses.py
================================================
import numpy as np
import torch
import torch.nn.functional as F


def FDM_Darcy(u, a, D=1):
    batchsize = u.size(0)
    size = u.size(1)
    u = u.reshape(batchsize, size, size)
    a = a.reshape(batchsize, size, size)
    dx = D / (size - 1)
    dy = dx

    # ux: (batch, size-2, size-2)
    ux = (u[:, 2:, 1:-1] - u[:, :-2, 1:-1]) / (2 * dx)
    uy = (u[:, 1:-1, 2:] - u[:, 1:-1, :-2]) / (2 * dy)

    # ax = (a[:, 2:, 1:-1] - a[:, :-2, 1:-1]) / (2 * dx)
    # ay = (a[:, 1:-1, 2:] - a[:, 1:-1, :-2]) / (2 * dy)
    # uxx = (u[:, 2:, 1:-1] -2*u[:,1:-1,1:-1] +u[:, :-2, 1:-1]) / (dx**2)
    # uyy = (u[:, 1:-1, 2:] -2*u[:,1:-1,1:-1] +u[:, 1:-1, :-2]) / (dy**2)

    a = a[:, 1:-1, 1:-1]
    # u = u[:, 1:-1, 1:-1]
    # Du = -(ax*ux + ay*uy + a*uxx + a*uyy)

    # inner1 = torch.mean(a*(ux**2 + uy**2), dim=[1,2])
    # inner2 = torch.mean(f*u, dim=[1,2])
    # return 0.5*inner1 - inner2

    aux = a * ux
    auy = a * uy
    auxx = (aux[:, 2:, 1:-1] - aux[:, :-2, 1:-1]) / (2 * dx)
    auyy = (auy[:, 1:-1, 2:] - auy[:, 1:-1, :-2]) / (2 * dy)
    Du = - (auxx + auyy)
    return Du


def darcy_loss(u, a):
    batchsize = u.size(0)
    size = u.size(1)
    u = u.reshape(batchsize, size, size)
    a = a.reshape(batchsize, size, size)
    lploss = LpLoss(size_average=True)

    # index_x = torch.cat([torch.tensor(range(0, size)), (size - 1) * torch.ones(size), torch.tensor(range(size-1, 1, -1)),
    #                      torch.zeros(size)], dim=0).long()
    # index_y = torch.cat([(size - 1) * torch.ones(size), torch.tensor(range(size-1, 1, -1)), torch.zeros(size),
    #                      torch.tensor(range(0, size))], dim=0).long()

    # boundary_u = u[:, index_x, index_y]
    # truth_u = torch.zeros(boundary_u.shape, device=u.device)
    # loss_u = lploss.abs(boundary_u, truth_u)

    Du = FDM_Darcy(u, a)
    f = torch.ones(Du.shape, device=u.device)
    loss_f = lploss.rel(Du, f)

    # im = (Du-f)[0].detach().cpu().numpy()
    # plt.imshow(im)
    # plt.show()

    # loss_f = FDM_Darcy(u, a)
    # loss_f = torch.mean(loss_f)
    return loss_f


def FDM_NS_vorticity(w, v=1/40, t_interval=1.0):
    batchsize = w.size(0)
    nx = w.size(1)
    ny = w.size(2)
    nt = w.size(3)
    device = w.device
    w = w.reshape(batchsize, nx, ny, nt)

    w_h = torch.fft.fft2(w, dim=[1, 2])
    # Wavenumbers in y-direction
    k_max = nx//2
    N = nx
    k_x = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device),
                     torch.arange(start=-k_max, end=0, step=1, device=device)), 0).reshape(N, 1).repeat(1, N).reshape(1,N,N,1)
    k_y = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device),
                     torch.arange(start=-k_max, end=0, step=1, device=device)), 0).reshape(1, N).repeat(N, 1).reshape(1,N,N,1)
    # Negative Laplacian in Fourier space
    lap = (k_x ** 2 + k_y ** 2)
    lap[0, 0, 0, 0] = 1.0
    f_h = w_h / lap

    ux_h = 1j * k_y * f_h
    uy_h = -1j * k_x * f_h
    wx_h = 1j * k_x * w_h
    wy_h = 1j * k_y * w_h
    wlap_h = -lap * w_h

    ux = torch.fft.irfft2(ux_h[:, :, :k_max + 1], dim=[1, 2])
    uy = torch.fft.irfft2(uy_h[:, :, :k_max + 1], dim=[1, 2])
    wx = torch.fft.irfft2(wx_h[:, :, :k_max+1], dim=[1,2])
    wy = torch.fft.irfft2(wy_h[:, :, :k_max+1], dim=[1,2])
    wlap = torch.fft.irfft2(wlap_h[:, :, :k_max+1], dim=[1,2])

    dt = t_interval / (nt-1)
    wt = (w[:, :, :, 2:] - w[:, :, :, :-2]) / (2 * dt)

    Du1 = wt + (ux*wx + uy*wy - v*wlap)[...,1:-1] #- forcing
    return Du1


def Autograd_Burgers(u, grid, v=1/100):
    from torch.autograd import grad
    gridt, gridx = grid

    ut = grad(u.sum(), gridt, create_graph=True)[0]
    ux = grad(u.sum(), gridx, create_graph=True)[0]
    uxx = grad(ux.sum(), gridx, create_graph=True)[0]
    Du = ut + ux*u - v*uxx
    return Du, ux, uxx, ut


def AD_loss(u, u0, grid, index_ic=None, p=None, q=None):
    batchsize = u.size(0)
    # lploss = LpLoss(size_average=True)

    Du, ux, uxx, ut = Autograd_Burgers(u, grid)

    if index_ic is None:
        # u in on a uniform grid
        nt = u.size(1)
        nx = u.size(2)
        u = u.reshape(batchsize, nt, nx)

        index_t = torch.zeros(nx,).long()
        index_x = torch.tensor(range(nx)).long()
        boundary_u = u[:, index_t, index_x]

        # loss_bc0 = F.mse_loss(u[:, :, 0], u[:, :, -1])
        # loss_bc1 = F.mse_loss(ux[:, :, 0], ux[:, :, -1])
    else:
        # u is randomly sampled, 0:p are BC, p:2p are ic, 2p:2p+q are interior
        boundary_u = u[:, :p]
        batch_index = torch.tensor(range(batchsize)).reshape(batchsize, 1).repeat(1, p)
        u0 = u0[batch_index, index_ic]

        # loss_bc0 = F.mse_loss(u[:, p:p+p//2], u[:, p+p//2:2*p])
        # loss_bc1 = F.mse_loss(ux[:, p:p+p//2], ux[:, p+p//2:2*p])

    loss_ic = F.mse_loss(boundary_u, u0)
    f = torch.zeros(Du.shape, device=u.device)
    loss_f = F.mse_loss(Du, f)
    return loss_ic, loss_f


class LpLoss(object):
    '''
    loss function with rel/abs Lp loss
    '''
    def __init__(self, d=2, p=2, size_average=True, reduction=True):
        super(LpLoss, self).__init__()

        #Dimension and Lp-norm type are postive
        assert d > 0 and p > 0

        self.d = d
        self.p = p
        self.reduction = reduction
        self.size_average = size_average

    def abs(self, x, y):
        num_examples = x.size()[0]

        #Assume uniform mesh
        h = 1.0 / (x.size()[1] - 1.0)

        all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(all_norms)
            else:
                return torch.sum(all_norms)

        return all_norms

    def rel(self, x, y):
        num_examples = x.size()[0]

        diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1)
        y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(diff_norms/y_norms)
            else:
                return torch.sum(diff_norms/y_norms)

        return diff_norms/y_norms

    def __call__(self, x, y):
        return self.rel(x, y)


def FDM_Burgers(u, v, D=1):
    batchsize = u.size(0)
    nt = u.size(1)
    nx = u.size(2)

    u = u.reshape(batchsize, nt, nx)
    dt = D / (nt-1)
    dx = D / (nx)

    u_h = torch.fft.fft(u, dim=2)
    # Wavenumbers in y-direction
    k_max = nx//2
    k_x = torch.cat((torch.arange(start=0, end=k_max, step=1, device=u.device),
                     torch.arange(start=-k_max, end=0, step=1, device=u.device)), 0).reshape(1,1,nx)
    ux_h = 2j *np.pi*k_x*u_h
    uxx_h = 2j *np.pi*k_x*ux_h
    ux = torch.fft.irfft(ux_h[:, :, :k_max+1], dim=2, n=nx)
    uxx = torch.fft.irfft(uxx_h[:, :, :k_max+1], dim=2, n=nx)
    ut = (u[:, 2:, :] - u[:, :-2, :]) / (2 * dt)
    Du = ut + (ux*u - v*uxx)[:,1:-1,:]
    return Du


def PINO_loss(u, u0, v):
    batchsize = u.size(0)
    nt = u.size(1)
    nx = u.size(2)

    u = u.reshape(batchsize, nt, nx)
    # lploss = LpLoss(size_average=True)

    index_t = torch.zeros(nx,).long()
    index_x = torch.tensor(range(nx)).long()
    boundary_u = u[:, index_t, index_x]
    loss_u = F.mse_loss(boundary_u, u0)

    Du = FDM_Burgers(u, v)[:, :, :]
    f = torch.zeros(Du.shape, device=u.device)
    loss_f = F.mse_loss(Du, f)

    # loss_bc0 = F.mse_loss(u[:, :, 0], u[:, :, -1])
    # loss_bc1 = F.mse_loss((u[:, :, 1] - u[:, :, -1]) /
    #                       (2/(nx)), (u[:, :, 0] - u[:, :, -2])/(2/(nx)))
    return loss_u, loss_f


def PINO_loss3d(u, u0, forcing, v=1/40, t_interval=1.0):
    batchsize = u.size(0)
    nx = u.size(1)
    ny = u.size(2)
    nt = u.size(3)

    u = u.reshape(batchsize, nx, ny, nt)
    lploss = LpLoss(size_average=True)

    u_in = u[:, :, :, 0]
    loss_ic = lploss(u_in, u0)

    Du = FDM_NS_vorticity(u, v, t_interval)
    f = forcing.repeat(batchsize, 1, 1, nt-2)
    loss_f = lploss(Du, f)

    return loss_ic, loss_f


def PDELoss(model, x, t, nu):
    '''
    Compute the residual of PDE:
        residual = u_t + u * u_x - nu * u_{xx} : (N,1)

    Params:
        - model
        - x, t: (x, t) pairs, (N, 2) tensor
        - nu: constant of PDE
    Return:
        - mean of residual : scalar
    '''
    u = model(torch.cat([x, t], dim=1))
    # First backward to compute u_x (shape: N x 1), u_t (shape: N x 1)
    grad_x, grad_t = torch.autograd.grad(outputs=[u.sum()], inputs=[x, t], create_graph=True)
    # Second backward to compute u_{xx} (shape N x 1)

    gradgrad_x, = torch.autograd.grad(outputs=[grad_x.sum()], inputs=[x], create_graph=True)

    residual = grad_t + u * grad_x - nu * gradgrad_x
    return residual


def get_forcing(S):
    x1 = torch.tensor(np.linspace(0, 2*np.pi, S, endpoint=False), dtype=torch.float).reshape(S, 1).repeat(1, S)
    x2 = torch.tensor(np.linspace(0, 2*np.pi, S, endpoint=False), dtype=torch.float).reshape(1, S).repeat(S, 1)
    return -4 * (torch.cos(4*(x2))).reshape(1,S,S,1)

================================================
FILE: train_utils/negadam.py
================================================
import math
import torch
from torch import Tensor
from typing import List, Optional
from torch.optim.optimizer import Optimizer


def adam(params: List[Tensor],
         grads: List[Tensor],
         exp_avgs: List[Tensor],
         exp_avg_sqs: List[Tensor],
         max_exp_avg_sqs: List[Tensor],
         state_steps: List[int],
         *,
         amsgrad: bool,
         beta1: float,
         beta2: float,
         lr: float,
         weight_decay: float,
         eps: float):
    r"""Functional API that performs Adam algorithm computation.
    See :class:`~torch.optim.Adam` for details.
    """

    for i, param in enumerate(params):

        grad = grads[i]
        exp_avg = exp_avgs[i]
        exp_avg_sq = exp_avg_sqs[i]
        step = state_steps[i]

        bias_correction1 = 1 - beta1 ** step
        bias_correction2 = 1 - beta2 ** step

        if weight_decay != 0:
            grad = grad.add(param, alpha=weight_decay)

        # Decay the first and second moment running average coefficient
        exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
        exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2)
        if amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.maximum(max_exp_avg_sqs[i], exp_avg_sq, out=max_exp_avg_sqs[i])
            # Use the max. for normalizing running avg. of gradient
            denom = (max_exp_avg_sqs[i].sqrt() / math.sqrt(bias_correction2)).add_(eps)
        else:
            denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps)

        step_size = lr / bias_correction1

        param.addcdiv_(exp_avg, denom, value=step_size)


class NAdam(Optimizer):
    r"""Implements Adam algorithm.
    It has been proposed in `Adam: A Method for Stochastic Optimization`_.
    The implementation of the L2 penalty follows changes proposed in
    `Decoupled Weight Decay Regularization`_.
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_
            (default: False)
    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _Decoupled Weight Decay Regularization:
        https://arxiv.org/abs/1711.05101
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ
    """

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
                 weight_decay=0, amsgrad=False):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
        if not 0.0 <= weight_decay:
            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
        defaults = dict(lr=lr, betas=betas, eps=eps,
                        weight_decay=weight_decay, amsgrad=amsgrad)
        super(NAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(NAdam, self).__setstate__(state)
        for group in self.param_groups:
            group.setdefault('amsgrad', False)

    @torch.no_grad()
    def step(self, closure=None):
        """Performs a single optimization step.
        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            params_with_grad = []
            grads = []
            exp_avgs = []
            exp_avg_sqs = []
            max_exp_avg_sqs = []
            state_steps = []
            beta1, beta2 = group['betas']

            for p in group['params']:
                if p.grad is not None:
                    params_with_grad.append(p)
                    if p.grad.is_sparse:
                        raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
                    grads.append(p.grad)

                    state = self.state[p]
                    # Lazy state initialization
                    if len(state) == 0:
                        state['step'] = 0
                        # Exponential moving average of gradient values
                        state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                        # Exponential moving average of squared gradient values
                        state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                        if group['amsgrad']:
                            # Maintains max of all exp. moving avg. of sq. grad. values
                            state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)

                    exp_avgs.append(state['exp_avg'])
                    exp_avg_sqs.append(state['exp_avg_sq'])

                    if group['amsgrad']:
                        max_exp_avg_sqs.append(state['max_exp_avg_sq'])

                    # update the steps for each param group update
                    state['step'] += 1
                    # record the step after step update
                    state_steps.append(state['step'])

            adam(params_with_grad,
                 grads,
                 exp_avgs,
                 exp_avg_sqs,
                 max_exp_avg_sqs,
                 state_steps,
                 amsgrad=group['amsgrad'],
                 beta1=beta1,
                 beta2=beta2,
                 lr=group['lr'],
                 weight_decay=group['weight_decay'],
                 eps=group['eps'])
        return loss


================================================
FILE: train_utils/train_2d.py
================================================
import numpy as np
import torch
from tqdm import tqdm
from .utils import save_checkpoint
from .losses import LpLoss, darcy_loss, PINO_loss

try:
    import wandb
except ImportError:
    wandb = None


def train_2d_operator(model,
                      train_loader,
                      optimizer, scheduler,
                      config,
                      rank=0, log=False,
                      project='PINO-2d-default',
                      group='default',
                      tags=['default'],
                      use_tqdm=True,
                      profile=False):
    '''
    train PINO on Darcy Flow
    Args:
        model:
        train_loader:
        optimizer:
        scheduler:
        config:
        rank:
        log:
        project:
        group:
        tags:
        use_tqdm:
        profile:

    Returns:

    '''
    if rank == 0 and wandb and log:
        run = wandb.init(project=project,
                         entity=config['log']['entity'],
                         group=group,
                         config=config,
                         tags=tags, reinit=True,
                         settings=wandb.Settings(start_method="fork"))

    data_weight = config['train']['xy_loss']
    f_weight = config['train']['f_loss']
    model.train()
    myloss = LpLoss(size_average=True)
    pbar = range(config['train']['epochs'])
    if use_tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.1)
    mesh = train_loader.dataset.mesh
    mollifier = torch.sin(np.pi * mesh[..., 0]) * torch.sin(np.pi * mesh[..., 1]) * 0.001
    mollifier = mollifier.to(rank)
    pde_mesh = train_loader.dataset.pde_mesh
    pde_mol = torch.sin(np.pi * pde_mesh[..., 0]) * torch.sin(np.pi * pde_mesh[..., 1]) * 0.001
    pde_mol = pde_mol.to(rank)
    for e in pbar:
        loss_dict = {'train_loss': 0.0,
                     'data_loss': 0.0,
                     'f_loss': 0.0,
                     'test_error': 0.0}
        for data_ic, u, pde_ic in train_loader:
            data_ic, u, pde_ic = data_ic.to(rank), u.to(rank), pde_ic.to(rank)

            optimizer.zero_grad()

            # data loss
            if data_weight > 0:
                pred = model(data_ic).squeeze(dim=-1)
                pred = pred * mollifier
                data_loss = myloss(pred, y)

            a = x[..., 0]
            f_loss = darcy_loss(pred, a)

            loss = data_weight * data_loss + f_weight * f_loss
            loss.backward()
            optimizer.step()

            loss_dict['train_loss'] += loss.item() * y.shape[0]
            loss_dict['f_loss'] += f_loss.item() * y.shape[0]
            loss_dict['data_loss'] += data_loss.item() * y.shape[0]

        scheduler.step()
        train_loss_val = loss_dict['train_loss'] / len(train_loader.dataset)
        f_loss_val = loss_dict['f_loss'] / len(train_loader.dataset)
        data_loss_val = loss_dict['data_loss'] / len(train_loader.dataset)

        if use_tqdm:
            pbar.set_description(
                (
                    f'Epoch: {e}, train loss: {train_loss_val:.5f}, '
                    f'f_loss: {f_loss_val:.5f}, '
                    f'data loss: {data_loss_val:.5f}'
                )
            )
        if wandb and log:
            wandb.log(
                {
                    'train loss': train_loss_val,
                    'f loss': f_loss_val,
                    'data loss': data_loss_val
                }
            )
    save_checkpoint(config['train']['save_dir'],
                    config['train']['save_name'],
                    model, optimizer)
    if wandb and log:
        run.finish()
    print('Done!')


def train_2d_burger(model,
                    train_loader, v,
                    optimizer, scheduler,
                    config,
                    rank=0, log=False,
                    project='PINO-2d-default',
                    group='default',
                    tags=['default'],
                    use_tqdm=True):
    if rank == 0 and wandb and log:
        run = wandb.init(project=project,
                         entity=config['log']['entity'],
                         group=group,
                         config=config,
                         tags=tags, reinit=True,
                         settings=wandb.Settings(start_method="fork"))

    data_weight = config['train']['xy_loss']
    f_weight = config['train']['f_loss']
    ic_weight = config['train']['ic_loss']
    model.train()
    myloss = LpLoss(size_average=True)
    pbar = range(config['train']['epochs'])
    if use_tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.1)

    for e in pbar:
        model.train()
        train_pino = 0.0
        data_l2 = 0.0
        train_loss = 0.0

        for x, y in train_loader:
            x, y = x.to(rank), y.to(rank)
            out = model(x).reshape(y.shape)
            data_loss = myloss(out, y)

            loss_u, loss_f = PINO_loss(out, x[:, 0, :, 0], v)
            total_loss = loss_u * ic_weight + loss_f * f_weight + data_loss * data_weight

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

            data_l2 += data_loss.item()
            train_pino += loss_f.item()
            train_loss += total_loss.item()
        scheduler.step()
        data_l2 /= len(train_loader)
        train_pino /= len(train_loader)
        train_loss /= len(train_loader)
        if use_tqdm:
            pbar.set_description(
                (
                    f'Epoch {e}, train loss: {train_loss:.5f} '
                    f'train f error: {train_pino:.5f}; '
                    f'data l2 error: {data_l2:.5f}'
                )
            )
        if wandb and log:
            wandb.log(
                {
                    'Train f error': train_pino,
                    'Train L2 error': data_l2,
                    'Train loss': train_loss,
                }
            )

        if e % 100 == 0:
            save_checkpoint(config['train']['save_dir'],
                            config['train']['save_name'].replace('.pt', f'_{e}.pt'),
                            model, optimizer)
    save_checkpoint(config['train']['save_dir'],
                    config['train']['save_name'],
                    model, optimizer)
    print('Done!')

================================================
FILE: train_utils/train_3d.py
================================================
import torch
from tqdm import tqdm
from timeit import default_timer
import torch.nn.functional as F
from .utils import save_checkpoint
from .losses import LpLoss, PINO_loss3d, get_forcing
from .distributed import reduce_loss_dict
from .data_utils import sample_data

try:
    import wandb
except ImportError:
    wandb = None
    

def train(model,
          loader, train_loader,
          optimizer, scheduler,
          forcing, config,
          rank=0,
          log=False,
          project='PINO-default',
          group='FDM',
          tags=['Nan'],
          use_tqdm=True,
          profile=False):
    if rank == 0 and wandb and log:
        run = wandb.init(project=project,
                         entity=config['log']['entity'],
                         group=group,
                         config=config,
                         tags=tags, reinit=True,
                         settings=wandb.Settings(start_method="fork"))

    # data parameters
    v = 1 / config['data']['Re']
    S, T = loader.S, loader.T
    t_interval = config['data']['time_interval']

    # training settings
    batch_size = config['train']['batchsize']
    ic_weight = config['train']['ic_loss']
    f_weight = config['train']['f_loss']
    xy_weight = config['train']['xy_loss']

    model.train()
    myloss = LpLoss(size_average=True)
    pbar = range(config['train']['epochs'])
    if use_tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.05)
    zero = torch.zeros(1).to(rank)

    for ep in pbar:
        loss_dict = {'train_loss': 0.0,
                     'train_ic': 0.0,
                     'train_f': 0.0,
                     'test_l2': 0.0}
        log_dict = {}
        if rank == 0 and profile:
                torch.cuda.synchronize()
                t1 = default_timer()
        # start solving
        for x, y in train_loader:
            x, y = x.to(rank), y.to(rank)

            optimizer.zero_grad()
            x_in = F.pad(x, (0, 0, 0, 5), "constant", 0)
            out = model(x_in).reshape(batch_size, S, S, T + 5)
            out = out[..., :-5]
            x = x[:, :, :, 0, -1]

            loss_l2 = myloss(out.view(batch_size, S, S, T), y.view(batch_size, S, S, T))

            if ic_weight != 0 or f_weight != 0:
                loss_ic, loss_f = PINO_loss3d(out.view(batch_size, S, S, T), x, forcing, v, t_interval)
            else:
                loss_ic, loss_f = zero, zero

            total_loss = loss_l2 * xy_weight + loss_f * f_weight + loss_ic * ic_weight

            total_loss.backward()

            optimizer.step()
            loss_dict['train_ic'] += loss_ic
            loss_dict['test_l2'] += loss_l2
            loss_dict['train_loss'] += total_loss
            loss_dict['train_f'] += loss_f

        if rank == 0 and profile:
            torch.cuda.synchronize()
            t2 = default_timer()
            log_dict['Time cost'] = t2 - t1
        scheduler.step()
        loss_reduced = reduce_loss_dict(loss_dict)
        train_ic = loss_reduced['train_ic'].item() / len(train_loader)
        train_f = loss_reduced['train_f'].item() / len(train_loader)
        train_loss = loss_reduced['train_loss'].item() / len(train_loader)
        test_l2 = loss_reduced['test_l2'].item() / len(train_loader)
        log_dict = {
            'Train f error': train_f,
            'Train L2 error': train_ic,
            'Train loss': train_loss,
            'Test L2 error': test_l2
            }

        if rank == 0:
            if use_tqdm:
                pbar.set_description(
                    (
                        f'Train f error: {train_f:.5f}; Train ic l2 error: {train_ic:.5f}. '
                        f'Train loss: {train_loss:.5f}; Test l2 error: {test_l2:.5f}'
                    )
                )
            if wandb and log:
                wandb.log(log_dict)

    if rank == 0:
        save_checkpoint(config['train']['save_dir'],
                        config['train']['save_name'],
                        model, optimizer)
        if wandb and log:
            run.finish()


def mixed_train(model,              # model of neural operator
                train_loader,       # dataloader for training with data
                S1, T1,             # spacial and time dimension for training with data
                a_loader,           # generator for  ICs
                S2, T2,             # spacial and time dimension for training with equation only
                optimizer,          # optimizer
                scheduler,          # learning rate scheduler
                config,             # configuration dict
                device=torch.device('cpu'),
                log=False,          # turn on the wandb
                project='PINO-default', # project name
                group='FDM',        # group name
                tags=['Nan'],       # tags
                use_tqdm=True):     # turn on tqdm
    if wandb and log:
        run = wandb.init(project=project,
                         entity=config['log']['entity'],
                         group=group,
                         config=config,
                         tags=tags, reinit=True,
                         settings=wandb.Settings(start_method="fork"))

    # data parameters
    v = 1 / config['data']['Re']
    t_interval = config['data']['time_interval']
    forcing_1 = get_forcing(S1).to(device)
    forcing_2 = get_forcing(S2).to(device)
    # training settings
    batch_size = config['train']['batchsize']
    ic_weight = config['train']['ic_loss']
    f_weight = config['train']['f_loss']
    xy_weight = config['train']['xy_loss']
    num_data_iter = config['train']['data_iter']
    num_eqn_iter = config['train']['eqn_iter']

    model.train()
    myloss = LpLoss(size_average=True)
    pbar = range(config['train']['epochs'])
    if use_tqdm:
        pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.05)
    zero = torch.zeros(1).to(device)
    train_loader = sample_data(train_loader)
    for ep in pbar:
        model.train()
        t1 = default_timer()
        train_loss = 0.0
        train_ic = 0.0
        train_f = 0.0
        test_l2 = 0.0
        err_eqn = 0.0
        # train with data
        for _ in range(num_data_iter):
            x, y = next(train_loader)
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            x_in = F.pad(x, (0, 0, 0, 5), "constant", 0)
            out = model(x_in).reshape(batch_size, S1, S1, T1 + 5)
            out = out[..., :-5]
            x = x[:, :, :, 0, -1]

            loss_l2 = myloss(out.view(batch_size, S1, S1, T1),
                             y.view(batch_size, S1, S1, T1))

            if ic_weight != 0 or f_weight != 0:
                loss_ic, loss_f = PINO_loss3d(out.view(batch_size, S1, S1, T1),
                                              x, forcing_1,
                                              v, t_interval)
            else:
                loss_ic, loss_f = zero, zero

            total_loss = loss_l2 * xy_weight + loss_f * f_weight + loss_ic * ic_weight

            total_loss.backward()
            optimizer.step()

            train_ic = loss_ic.item()
            test_l2 += loss_l2.item()
            train_loss += total_loss.item()
            train_f += loss_f.item()
        if num_data_iter != 0:
            train_ic /= num_data_iter
            train_f /= num_data_iter
            train_loss /= num_data_iter
            test_l2 /= num_data_iter
        # train with random ICs
        for _ in range(num_eqn_iter):
            new_a = next(a_loader)
            new_a = new_a.to(device)
            optimizer.zero_grad()
            x_in = F.pad(new_a, (0, 0, 0, 5), "constant", 0)
            out = model(x_in).reshape(batch_size, S2, S2, T2 + 5)
            out = out[..., :-5]
            new_a = new_a[:, :, :, 0, -1]
            loss_ic, loss_f = PINO_loss3d(out.view(batch_size, S2, S2, T2),
                                          new_a, forcing_2,
                                          v, t_interval)
            eqn_loss = loss_f * f_weight + loss_ic * ic_weight
            eqn_loss.backward()
            optimizer.step()

            err_eqn += eqn_loss.item()

        scheduler.step()
        t2 = default_timer()
        if num_eqn_iter != 0:
            err_eqn /= num_eqn_iter
        if use_tqdm:
            pbar.set_description(
                (
                    f'Data f error: {train_f:.5f}; Data ic l2 error: {train_ic:.5f}. '
                    f'Data train loss: {train_loss:.5f}; Data l2 error: {test_l2:.5f}'
                    f'Eqn loss: {err_eqn:.5f}'
                )
            )
        if wandb and log:
            wandb.log(
                {
                    'Data f error': train_f,
                    'Data IC L2 error': train_ic,
                    'Data train loss': train_loss,
                    'Data L2 error': test_l2,
                    'Random IC Train equation loss': err_eqn,
                    'Time cost': t2 - t1
                }
            )

    save_checkpoint(config['train']['save_dir'],
                    config['train']['save_name'],
                    model, optimizer)
    if wandb and log:
        run.finish()


def progressive_train(model,
                      loader, train_loader,
                      optimizer, scheduler,
                      milestones, config,
                      device=torch.device('cpu'),
                      log=False,
                      project='PINO-default',
                      group='FDM',
                      tags=['Nan'],
                      use_tqdm=True):
    if wandb and log:
        run = wandb.init(project=project,
                         entity=config['log']['entity'],
                         group=group,
                         config=config,
                         tags=tags, reinit=True,
                         settings=wandb.Settings(start_method="fork"))

    # data parameters
    v = 1 / config['data']['Re']
    T = loader.T
    t_interval = config['data']['time_interval']

    # training settings
    batch_size = config['train']['batchsize']
    ic_weight = config['train']['ic_loss']
    f_weight = config['train']['f_loss']
    xy_weight = config['train']['xy_loss']

    model.train()
    myloss = LpLoss(size_average=True)
    zero = torch.zeros(1).to(device)
    for milestone, epochs in zip(milestones, config['train']['epochs']):
        pbar = range(epochs)
        if use_tqdm:
            pbar = tqdm(pbar, dynamic_ncols=True, smoothing=0.05)
        S = loader.S // milestone
        print(f'Resolution :{S}')
        forcing = get_forcing(S).to(device)
        for ep in pbar:
            model.train()
            t1 = default_timer()
            train_loss = 0.0
            train_ic = 0.0
            train_f = 0.0
            test_l2 = 0.0
            for x, y in train_loader:
                x, y = x.to(device), y.to(device)
                x = x[:, ::milestone, ::milestone, :, :]
                y = y[:, ::milestone, ::milestone, :]
                optimizer.zero_grad()
                x_in = F.pad(x, (0, 0, 0, 5), "constant", 0)
                out = model(x_in).reshape(batch_size, S, S, T + 5)
                out = out[..., :-5]
                x = x[:, :, :, 0, -1]

                loss_l2 = myloss(out.view(batch_size, S, S, T), y.view(batch_size, S, S, T))

                if ic_weight != 0 or f_weight != 0:
                    loss_ic, loss_f = PINO_loss3d(out.view(batch_size, S, S, T),
                                                  x, forcing, v, t_interval)
                else:
                    loss_ic, loss_f = zero, zero

                total_loss = loss_l2 * xy_weight + loss_f * f_weight + loss_ic * ic_weight

                total_loss.backward()

                optimizer.step()
                train_ic = loss_ic.item()
                test_l2 += loss_l2.item()
                train_loss += total_loss.item()
                train_f += loss_f.item()
            scheduler.step()

            train_ic /= len(train_loader)
            train_f /= len(train_loader)
            train_loss /= len(train_loader)
            test_l2 /= len(train_loader)
            t2 = default_timer()
            if use_tqdm:
                pbar.set_description(
                    (
                        f'Train f error: {train_f:.5f}; Train ic l2 error: {train_ic:.5f}. '
                        f'Train loss: {train_loss:.5f}; Test l2 error: {test_l2:.5f}'
                    )
                )
            if wandb and log:
                wandb.log(
                    {
                        'Train f error': train_f,
                        'Train L2 error': train_ic,
                        'Train loss': train_loss,
                        'Test L2 error': test_l2,
                        'Time cost': t2 - t1
                    }
                )

    save_checkpoint(config['train']['save_dir'],
                    config['train']['save_name'],
                    model, optimizer)
    if wandb and log:
        run.finish()


================================================
FILE: train_utils/utils.py
================================================
import os
import numpy as np
import torch


def vor2vel(w, L=2 * np.pi):
    '''
    Convert vorticity into velocity
    Args:
        w: vorticity with shape (batchsize, num_x, num_y, num_t)

    Returns:
        ux, uy with the same shape
    '''
    batchsize = w.size(0)
    nx = w.size(1)
    ny = w.size(2)
    nt = w.size(3)
    device = w.device
    w = w.reshape(batchsize, nx, ny, nt)

    w_h = torch.fft.fft2(w, dim=[1, 2])
    # Wavenumbers in y-direction
    k_max = nx // 2
    N = nx
    k_x = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device),
                     torch.arange(start=-k_max, end=0, step=1, device=device)), 0) \
        .reshape(N, 1).repeat(1, N).reshape(1, N, N, 1)
    k_y = torch.cat((torch.arange(start=0, end=k_max, step=1, device=device),
                     torch.arange(start=-k_max, end=0, step=1, device=device)), 0) \
        .reshape(1, N).repeat(N, 1).reshape(1, N, N, 1)
    # Negative Laplacian in Fourier space
    lap = (k_x ** 2 + k_y ** 2)
    lap[0, 0, 0, 0] = 1.0
    f_h = w_h / lap

    ux_h = 2 * np.pi / L * 1j * k_y * f_h
    uy_h = -2 * np.pi / L * 1j * k_x * f_h

    ux = torch.fft.irfft2(ux_h[:, :, :k_max + 1], dim=[1, 2])
    uy = torch.fft.irfft2(uy_h[:, :, :k_max + 1], dim=[1, 2])
    return ux, uy


def get_sample(N, T, s, p, q):
    # sample p nodes from Initial Condition, p nodes from Boundary Condition, q nodes from Interior

    # sample IC
    index_ic = torch.randint(s, size=(N, p))
    sample_ic_t = torch.zeros(N, p)
    sample_ic_x = index_ic/s

    # sample BC
    sample_bc = torch.rand(size=(N, p//2))
    sample_bc_t =  torch.cat([sample_bc, sample_bc],dim=1)
    sample_bc_x = torch.cat([torch.zeros(N, p//2), torch.ones(N, p//2)],dim=1)

    # sample I
    # sample_i_t = torch.rand(size=(N,q))
    # sample_i_t = torch.rand(size=(N,q))**2
    sample_i_t = -torch.cos(torch.rand(size=(N, q))*np.pi/2) + 1
    sample_i_x = torch.rand(size=(N,q))

    sample_t = torch.cat([sample_ic_t, sample_bc_t, sample_i_t], dim=1).cuda()
    sample_t.requires_grad = True
    sample_x = torch.cat([sample_ic_x, sample_bc_x, sample_i_x], dim=1).cuda()
    sample_x.requires_grad = True
    sample = torch.stack([sample_t, sample_x], dim=-1).reshape(N, (p+p+q), 2)
    return sample, sample_t, sample_x, index_ic.long()


def get_grid(N, T, s):
    gridt = torch.tensor(np.linspace(0, 1, T), dtype=torch.float).reshape(1, T, 1).repeat(N, 1, s).cuda()
    gridt.requires_grad = True
    gridx = torch.tensor(np.linspace(0, 1, s+1)[:-1], dtype=torch.float).reshape(1, 1, s).repeat(N, T, 1).cuda()
    gridx.requires_grad = True
    grid = torch.stack([gridt, gridx], dim=-1).reshape(N, T*s, 2)
    return grid, gridt, gridx


def get_2dgrid(S):
    '''
    get array of points on 2d grid in (0,1)^2
    Args:
        S: resolution

    Returns:
        points: flattened grid, ndarray (N, 2)
    '''
    xarr = np.linspace(0, 1, S)
    yarr = np.linspace(0, 1, S)
    xx, yy = np.meshgrid(xarr, yarr, indexing='ij')
    points = np.stack([xx.ravel(), yy.ravel()], axis=0).T
    return points


def torch2dgrid(num_x, num_y, bot=(0,0), top=(1,1)):
    x_bot, y_bot = bot
    x_top, y_top = top
    x_arr = torch.linspace(x_bot, x_top, steps=num_x)
    y_arr = torch.linspace(y_bot, y_top, steps=num_y)
    xx, yy = torch.meshgrid(x_arr, y_arr, indexing='ij')
    mesh = torch.stack([xx, yy], dim=2)
    return mesh


def get_grid3d(S, T, time_scale=1.0, device='cpu'):
    gridx = torch.tensor(np.linspace(0, 1, S + 1)[:-1], dtype=torch.float, device=device)
    gridx = gridx.reshape(1, S, 1, 1, 1).repeat([1, 1, S, T, 1])
    gridy = torch.tensor(np.linspace(0, 1, S + 1)[:-1], dtype=torch.float, device=device)
    gridy = gridy.reshape(1, 1, S, 1, 1).repeat([1, S, 1, T, 1])
    gridt = torch.tensor(np.linspace(0, 1 * time_scale, T), dtype=torch.float, device=device)
    gridt = gridt.reshape(1, 1, 1, T, 1).repeat([1, S, S, 1, 1])
    return gridx, gridy, gridt


def convert_ic(u0, N, S, T, time_scale=1.0):
    u0 = u0.reshape(N, S, S, 1, 1).repeat([1, 1, 1, T, 1])
    gridx, gridy, gridt = get_grid3d(S, T, time_scale=time_scale, device=u0.device)
    a_data = torch.cat((gridx.repeat([N, 1, 1, 1, 1]), gridy.repeat([N, 1, 1, 1, 1]),
                        gridt.repeat([N, 1, 1, 1, 1]), u0), dim=-1)
    return a_data


def requires_grad(model, flag=True):
    for p in model.parameters():
        p.requires_grad = flag


def set_grad(tensors, flag=True):
    for p in tensors:
        p.requires_grad = flag


def zero_grad(params):
    '''
    set grad field to 0
    '''
    if isinstance(params, torch.Tensor):
        if params.grad is not None:
            params.grad.zero_()
    else:
        for p in params:
            if p.grad is not None:
                p.grad.zero_()


def count_params(net):
    count = 0
    for p in net.parameters():
        count += p.numel()
    return count


def save_checkpoint(path, name, model, optimizer=None):
    ckpt_dir = 'checkpoints/%s/' % path
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    try:
        model_state_dict = model.module.state_dict()
    except AttributeError:
        model_state_dict = model.state_dict()

    if optimizer is not None:
        optim_dict = optimizer.state_dict()
    else:
        optim_dict = 0.0

    torch.save({
        'model': model_state_dict,
        'optim': optim_dict
    }, ckpt_dir + name)
    print('Checkpoint is saved at %s' % ckpt_dir + name)


def save_ckpt(path, model, optimizer=None, scheduler=None):
    model_state = model.state_dict()
    if optimizer:
        optim_state = optimizer.state_dict()
    else:
        optim_state = None
    
    if scheduler:
        scheduler_state = scheduler.state_dict()
    else:
        scheduler_state = None
    torch.save({
        'model': model_state, 
        'optim': optim_state, 
        'scheduler': scheduler_state
    }, path)
    print(f'Checkpoint is saved to {path}')


def dict2str(log_dict):
    res = ''
    for key, value in log_dict.items():
        res += f'{key}: {value}|'
    return res