Repository: PredictiveIntelligenceLab/MultiscalePINNs
Branch: main
Commit: ba7d6bb8af6c
Files: 19
Total size: 175.5 KB

Directory structure:
gitextract_k7rpt5pc/

├── GrayScott2D/
│   ├── Gray_Scott.py
│   ├── Gray_Scott_FF.py
│   ├── Gray_Scott_mFF.py
│   ├── data/
│   │   ├── GrayScott.m
│   │   ├── parse_data.py
│   │   └── readme
│   └── models_tf.py
├── Poisson1D/
│   ├── Compute_Jacobian.py
│   ├── Poisson_1D.py
│   └── models_tf.py
├── README.md
├── Regression/
│   ├── Compute_Jacobian.py
│   ├── models_tf.py
│   └── regression.py
├── heat1D/
│   ├── heat1D.py
│   └── models_tf.py
└── wave1D/
    ├── Compute_Jacobian.py
    ├── wave1D.py
    └── wave_models_tf.py

================================================
FILE CONTENTS
================================================

================================================
FILE: GrayScott2D/Gray_Scott.py
================================================
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.interpolate import griddata
from models_tf import Sampler, ResidualSampler, DataSampler, Gray_Scott2D

if __name__ == '__main__':
    # Reload  data
    datafile = 'data.npy'
    data = np.load(datafile, allow_pickle=True).item()

    X = data['X']
    U = data['U']

    # Time intervals
    tspan = data['tspan']
    T1 = data['T1']
    T2 = data['T2']

    # Parameters
    epsilon1 = data['ep1']
    epsilon2 = data['ep2']
    b = data['b']
    d = data['d']

    # Define data sampler and residual sampler
    dom_coords = np.array([[T1, -1.0, -1.0],
                           [T2, 1.0, 1.0]])
    res_sampler = Sampler(3, dom_coords, lambda x: np.zeros_like(x), name='Forcing')

    data_sampler = DataSampler(X, U)

    # Create model
    layers = [3, 100, 100, 100, 100, 100, 100, 100, 2]
    model = Gray_Scott2D(data_sampler, res_sampler, layers, b, d)

    # Train model
    model.train(nIter=120000, batch_size=1000)

    # Save results
    model.saver.save(model.sess, 'SavedModels/' 'GS_param' + '_7x100_it120000' + '.ckpt')

    ep1 = model.sess.run(model.epsilon1)
    ep2 = model.sess.run(model.epsilon2)

    print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1), np.exp(ep2)))

    ep1_log = model.ep1_log
    ep2_log = model.ep2_log

    np.savetxt('SavedResults/' + 'ep1_log_original', ep1_log, delimiter=',')
    np.savetxt('SavedResults/' + 'ep2_log_original', ep2_log, delimiter=',')

    # Prediction
    raw_data = sio.loadmat('sol.mat')

    X = raw_data['X']
    Y = raw_data['Y']
    tspan = raw_data['tspan'].flatten()
    usol = raw_data['usol']
    vsol = raw_data['vsol']

    step = -50
    x = X.flatten()[:, None]
    y = Y.flatten()[:, None]
    t = tspan[step] * np.ones_like(x)

    X_star = np.concatenate([t, x, y], axis=1)

    u_pred, v_pred = model.predict(X_star)
    u_star = usol[:, :, step].flatten()[:, None]
    v_star = vsol[:, :, step].flatten()[:, None]

    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
    error_v = np.linalg.norm(v_star - v_pred, 2) / np.linalg.norm(v_star, 2)

    print('Relative L2 error_u: %e' % (error_u))
    print('Relative L2 error_v: %e' % (error_v))

    ep1 = model.sess.run(model.epsilon1)
    ep2 = model.sess.run(model.epsilon2)

    print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1), np.exp(ep2)))

    # Plot
    U_star = griddata(np.concatenate([x, y], axis=1), u_pred.flatten(), (X, Y), method='cubic')
    V_star = griddata(np.concatenate([x, y], axis=1), v_pred.flatten(), (X, Y), method='cubic')

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(X, Y, U_star)
    plt.colorbar()
    plt.subplot(1, 3, 2)
    plt.pcolor(X, Y, usol[:, :, step])
    plt.colorbar()

    plt.subplot(1, 3, 3)
    plt.pcolor(X, Y, U_star - usol[:, :, step])
    plt.colorbar()
    plt.show()

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(X, Y, V_star)
    plt.colorbar()
    plt.subplot(1, 3, 2)
    plt.pcolor(X, Y, vsol[:, :, step])
    plt.colorbar()

    plt.subplot(1, 3, 3)
    plt.pcolor(X, Y, V_star - vsol[:, :, step])
    plt.colorbar()
    plt.show()


================================================
FILE: GrayScott2D/Gray_Scott_FF.py
================================================
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.interpolate import griddata
from models_tf import Sampler, ResidualSampler, DataSampler, Gray_Scott2D_FF

if __name__ == '__main__':
    # Reload  data
    datafile = 'data.npy'
    data = np.load(datafile, allow_pickle=True).item()

    X = data['X']
    U = data['U']

    # Time intervals
    tspan = data['tspan']
    T1 = data['T1']
    T2 = data['T2']

    # Parameters
    epsilon1 = data['ep1']
    epsilon2 = data['ep2']
    b = data['b']
    d = data['d']

    # Define data sampler and residual sampler
    dom_coords = np.array([[T1, -1.0, -1.0],
                           [T2, 1.0, 1.0]])

    res_sampler = Sampler(3, dom_coords, lambda x: np.zeros_like(x), name='Forcing')

    data_sampler = DataSampler(X, U)

    # Create model
    layers = [100, 100, 100, 100, 100, 100, 100, 2]
    model = Gray_Scott2D_FF(data_sampler, res_sampler, layers, b, d)

    model.train(nIter=120000, batch_size=1000)

    model.saver.save(model.sess, 'SavedModels/' 'GS_param_FF' + '_7x100_it120000' + '.ckpt')

    ep1 = model.sess.run(model.epsilon1)
    ep2 = model.sess.run(model.epsilon2)

    print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1), np.exp(ep2)))

    ep1_log = model.ep1_log
    ep2_log = model.ep2_log

    loss_data = model.loss_u_log
    loss_res = model.loss_r_log

    np.savetxt('SavedResults/' + 'ep1_log', ep1_log, delimiter=',')
    np.savetxt('SavedResults/' + 'ep2_log', ep2_log, delimiter=',')

    np.savetxt('SavedResults/' + 'loss_data', loss_data, delimiter=',')
    np.savetxt('SavedResults/' + 'loss_res', loss_res, delimiter=',')

    # Prediction
    raw_data = sio.loadmat('sol.mat')

    X = raw_data['X']
    Y = raw_data['Y']
    tspan = raw_data['tspan'].flatten()
    usol = raw_data['usol']
    vsol = raw_data['vsol']

    step = -50
    x = X.flatten()[:, None]
    y = Y.flatten()[:, None]
    t = tspan[step] * np.ones_like(x)

    X_star = np.concatenate([t, x, y], axis=1)

    u_pred, v_pred = model.predict(X_star)
    u_star = usol[:, :, step].flatten()[:, None]
    v_star = vsol[:, :, step].flatten()[:, None]

    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
    error_v = np.linalg.norm(v_star - v_pred, 2) / np.linalg.norm(v_star, 2)

    print('Relative L2 error_u: %e' % (error_u))
    print('Relative L2 error_v: %e' % (error_v))

    ep1 = model.sess.run(model.epsilon1)
    ep2 = model.sess.run(model.epsilon2)

    print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1), np.exp(ep2)))

    # Plot
    U_star = griddata(np.concatenate([x, y], axis=1), u_pred.flatten(), (X, Y), method='cubic')
    V_star = griddata(np.concatenate([x, y], axis=1), v_pred.flatten(), (X, Y), method='cubic')

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(X, Y, usol[:, :, step])
    #    quadmesh = plt.pcolormesh(X,Y,usol[:,:,step])
    #    quadmesh.set_clim(vmin=0, vmax=1)
    plt.colorbar()
    plt.title('Reference')

    plt.subplot(1, 3, 2)
    plt.pcolor(X, Y, U_star)
    plt.colorbar()
    plt.title('Predicted')

    plt.subplot(1, 3, 3)
    plt.pcolor(X, Y, U_star - usol[:, :, step])
    plt.colorbar()
    plt.title('Point-wise error')
    plt.show()

    plt.figure(figsize=(18, 5))

    plt.subplot(1, 3, 1)
    plt.pcolor(X, Y, vsol[:, :, step])
    plt.colorbar()
    plt.title('Reference')

    plt.subplot(1, 3, 2)
    plt.pcolor(X, Y, V_star)
    plt.colorbar()
    plt.title('Predicted')

    plt.subplot(1, 3, 3)
    plt.pcolor(X, Y, V_star - vsol[:, :, step])
    plt.colorbar()
    plt.title('Point-wise error')
    plt.show()

    plt.plot(ep1_log, label='ep1')
    plt.plot(ep2_log, label='ep2')
    plt.legend()
    plt.yscale('log')


================================================
FILE: GrayScott2D/Gray_Scott_mFF.py
================================================
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy.interpolate import griddata
from models_tf import Sampler, ResidualSampler, DataSampler, Gray_Scott2D_ST_mFF, Gray_Scott2D_ST_mFF_adaptive

if __name__ == '__main__':
    # Reload  data
    datafile = 'data.npy'
    data = np.load(datafile, allow_pickle=True).item()

    X = data['X']
    U = data['U']

    # Time intervals
    tspan = data['tspan']
    T1 = data['T1']
    T2 = data['T2']

    # Parameters
    epsilon1 = data['ep1']
    epsilon2 = data['ep2']
    b = data['b']
    d = data['d']

    # Define data sampler and residual sampler
    dom_coords = np.array([[T1, -1.0, -1.0],
                           [T2, 1.0, 1.0]])
    res_sampler = Sampler(3, dom_coords, lambda x: np.zeros_like(x), name='Forcing')

    data_sampler = DataSampler(X, U)

    # Create model
    layers = [100, 100, 100, 100, 100, 100, 100, 2]
    model = Gray_Scott2D_ST_mFF(data_sampler, res_sampler, layers, b, d)

    # Train model
    model.train(nIter=120000, batch_size=1000)

    # Save results
    model.saver.save(model.sess, 'SavedModels/' 'GS_param_ST_mFF' + '_7x100_it120000' + '.ckpt')

    ep1 = model.sess.run(model.epsilon1)
    ep2 = model.sess.run(model.epsilon2)

    print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1), np.exp(ep2)))

    ep1_log = model.ep1_log
    ep2_log = model.ep2_log

    loss_data = model.loss_u_log
    loss_res = model.loss_r_log

    np.savetxt('SavedResults/' + 'ep1_log', ep1_log, delimiter=',')
    np.savetxt('SavedResults/' + 'ep2_log', ep2_log, delimiter=',')

    np.savetxt('SavedResults/' + 'loss_data', loss_data, delimiter=',')
    np.savetxt('SavedResults/' + 'loss_res', loss_res, delimiter=',')

    # Prediction
    raw_data = sio.loadmat('sol.mat')

    X = raw_data['X']
    Y = raw_data['Y']
    tspan = raw_data['tspan'].flatten()
    usol = raw_data['usol']
    vsol = raw_data['vsol']

    step = -50
    x = X.flatten()[:, None]
    y = Y.flatten()[:, None]
    t = tspan[step] * np.ones_like(x)

    X_star = np.concatenate([t, x, y], axis=1)

    u_pred, v_pred = model.predict(X_star)
    u_star = usol[:, :, step].flatten()[:, None]
    v_star = vsol[:, :, step].flatten()[:, None]

    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
    error_v = np.linalg.norm(v_star - v_pred, 2) / np.linalg.norm(v_star, 2)

    print('Relative L2 error_u: %e' % (error_u))
    print('Relative L2 error_v: %e' % (error_v))

    # Plot
    U_star = griddata(np.concatenate([x, y], axis=1), u_pred.flatten(), (X, Y), method='cubic')
    V_star = griddata(np.concatenate([x, y], axis=1), v_pred.flatten(), (X, Y), method='cubic')

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(X, Y, U_star)
    plt.colorbar()
    plt.subplot(1, 3, 2)
    plt.pcolor(X, Y, usol[:, :, step])
    plt.colorbar()

    plt.subplot(1, 3, 3)
    plt.pcolor(X, Y, U_star - usol[:, :, step])
    plt.colorbar()
    plt.show()

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(X, Y, V_star)
    plt.colorbar()
    plt.subplot(1, 3, 2)
    plt.pcolor(X, Y, vsol[:, :, step])
    plt.colorbar()

    plt.subplot(1, 3, 3)
    plt.pcolor(X, Y, V_star - vsol[:, :, step])
    plt.colorbar()
    plt.show()


================================================
FILE: GrayScott2D/data/GrayScott.m
================================================
%% Gray-Scott equations in 2D
% Nick Trefethen, April 2016

%%
% (Chebfun Example pde/GrayScott.m)
% [Tags: #Gray-Scott, #spin2]

%% 1. Rolls
% The Gray-Scott equations are a pair of coupled reaction-diffusion
% equations that lead to interesting patterns [1,2,3].
% Let us look at two examples in 2D.

%%
% The equations are
% $$ u_t = \varepsilon_1\Delta u + b(1-u) - uv^2, \quad
% v_t = \varepsilon_2\Delta v - dv + uv^2, $$
% where $\Delta$ is the Laplacian and $\varepsilon_1,
% \varepsilon_2,b,d$ are parameters.
% To begin with we choose these values.
ep1 = 0.00001; ep2 = 0.000005;
b = 0.04; d = 0.1;
%%
% We now solve up to $t=3500$ with `spin2` and plot the $v$ variable.
% What beautiful, random-seeming "rolls" (or
% "fingerprints") appear!  
nn = 400;
steps = 500;
dt = 0.5;

dom = [-1 1 -1 1]; x = chebfun('x',dom(1:2)); tspan = linspace(0,5000, steps+1);
S = spinop2(dom,tspan);
S.lin = @(u,v) [ep1*lap(u); ep2*lap(v)];
S.nonlin = @(u,v) [b*(1-u)-u.*v.^2;-d*v+u.*v.^2];
S.init = chebfun2v(@(x,y) 1-exp(-80*((x+.05).^2+(y+.02).^2)), ...
                   @(x,y) exp(-80*((x-.05).^2+(y-.02).^2)),dom);
tic, u = spin2(S, nn, dt,'plot','off');

% plot(u{1, 4}), view(0,90), axis equal, axis off
time_in_seconds = toc

N = 200;
[X,Y] = meshgrid(linspace(-1,1, N), linspace(-1,1, N));

usol = zeros(N, N, steps+1);
for i = 1:steps+1
    usol(:,:,i) = u{1, i}(X,Y);
end

vsol = zeros(N,N, steps+1);
for i = 1:steps+1
    vsol(:,:,i) = u{2, i}(X,Y);
end

% save('sol.mat', 'b', 'd', 'ep1', 'ep2', 'tspan', 'usol', 'vsol', 'X', 'Y')


================================================
FILE: GrayScott2D/data/parse_data.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 25 13:10:47 2020

@author: Wsf12
"""

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio

data = sio.loadmat('sol.mat')

X = data['X']
Y = data['Y']

tspan = data['tspan'].flatten()

usol =  data['usol']
vsol =  data['vsol']

epsilon1 = data['ep1']
epsilon2 = data['ep2']
b = data['b']
d = data['d']

X_list = []
U_list = []

steps = len(tspan) # 500 snap shots

x = X.flatten()[:,None]
y = Y.flatten()[:,None]

T1 = 350
T2 = 400   

for k in range(T1, T2 + 1):
    t = tspan[k] * np.ones_like(x) 
    
    u = usol[:,:,k].flatten()[:, None]
    v = vsol[:,:,k].flatten()[:, None]
    
    X_list.append(np.concatenate([t, x, y], axis = 1))
    U_list.append(np.concatenate([u, v], axis = 1))

X = np.vstack(X_list)
U = np.vstack(U_list)

data_dict = {'X': X, 'U': U, 
             'tspan': tspan, 'T1': tspan[T1], 'T2': tspan[T2],  
             'ep1':epsilon1, 'ep2':epsilon2, 'b':b, 'd':d}

np.save('data.npy', data_dict)


##  data down sampling
#X_reduced = data['X_reduced']
#Y_reduced = data['Y_reduced']
#
#tspan = data['tspan'].flatten()
#
#usol =  data['usol_reduced']
#vsol =  data['vsol_reduced']
#
#epsilon1 = data['ep1']
#epsilon2 = data['ep2']
#b = data['b']
#d = data['d']
#
#X_list = []
#U_list = []
#
#steps = len(tspan) # 500 snap shots
#
#x = X_reduced.flatten()[:,None]
#y = Y_reduced.flatten()[:,None]
#
#for k in range(T1, T2):
#    t = tspan[k] * np.ones_like(x) 
#    
#    u = usol[:,:,k].flatten()[:, None]
#    v = vsol[:,:,k].flatten()[:, None]
#    
#    X_list.append(np.concatenate([t, x, y], axis = 1))
#    U_list.append(np.concatenate([u, v], axis = 1))
#
#X_reduced = np.vstack(X_list)
#U_reduced = np.vstack(U_list)
#
#data_dict = {'X_reduced': X_reduced, 'U_reduced': U_reduced, 
#             'tspan': tspan, 'T1': tspan[T1], 'T2': tspan[T2], 
#             'ep1':epsilon1, 'ep2':epsilon2, 'b':b, 'd':d}
#
#np.save('data_reduced.npy', data_dict)


================================================
FILE: GrayScott2D/data/readme
================================================


================================================
FILE: GrayScott2D/models_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 25 14:22:32 2020

@author: Wsf12
"""

import tensorflow as tf
import numpy as np
import time


class Sampler:
    # Initialize the class
    def __init__(self, dim, coords, func, name=None):
        self.dim = dim
        self.coords = coords
        self.func = func
        self.name = name

    def sample(self, N):
        x = self.coords[0:1, :] + (self.coords[1:2, :] - self.coords[0:1, :]) * np.random.rand(N, self.dim)
        y = self.func(x)
        return x, y


class ResidualSampler:
    # Initialize the class
    def __init__(self, X, name=None):
        self.X = X
        self.N = self.X.shape[0]

    def sample(self, batch_size):
        idx = np.random.choice(self.N, batch_size, replace=False)
        X_batch = self.X[idx, :]
        return X_batch


class DataSampler:
    # Initialize the class
    def __init__(self, X, Y, name=None):
        self.X = X
        self.Y = Y
        self.N = self.X.shape[0]

    def sample(self, batch_size):
        idx = np.random.choice(self.N, batch_size, replace=False)
        X_batch = self.X[idx, :]
        Y_batch = self.Y[idx, :]
        return X_batch, Y_batch


class Gray_Scott2D:
    # Initialize the class
    def __init__(self, data_sampler, residual_sampler, layers, b, d):

        N = data_sampler.N
        X, U = data_sampler.sample(N)

        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]
        self.mu_y, self.sigma_y = self.mu_X[2], self.sigma_X[2]

        self.mu_U, self.sigma_U = U.mean(0), U.std(0)
        self.mu_u, self.sigma_u = self.mu_U[0], self.sigma_U[0]
        self.mu_v, self.sigma_v = self.mu_U[1], self.sigma_U[1]

        # Samplers
        self.data_sampler = data_sampler
        self.residual_sampler = residual_sampler

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Parameters
        self.epsilon1 = tf.Variable(-10.0, dtype=tf.float32)
        self.epsilon2 = tf.Variable(-10.0, dtype=tf.float32)

        self.b = b
        self.d = d

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.v_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.w_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.y_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.y_r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.u_pred, self.v_pred = self.net_u(self.t_u_tf,
                                              self.x_u_tf,
                                              self.y_u_tf)

        self.u_res_pred, self.v_res_pred = self.net_r(self.t_r_tf,
                                                      self.x_r_tf,
                                                      self.y_r_tf)

        # Data loss
        self.loss_u_data = tf.reduce_mean(tf.square(self.u_tf - self.u_pred))
        self.loss_v_data = tf.reduce_mean(tf.square(self.v_tf - self.v_pred))
        self.loss_data = self.loss_u_data + self.loss_v_data

        # Residual loss
        self.loss_res_u = tf.reduce_mean(tf.square(self.u_res_pred))
        self.loss_res_v = tf.reduce_mean(tf.square(self.v_res_pred))

        self.loss_res = self.loss_res_u + self.loss_res_v

        # Total loss
        self.loss = self.loss_data + self.loss_res

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                        self.global_step,
                                                        1000, 0.9,
                                                        staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss,
                                                                            global_step=self.global_step)

        # Logger
        self.loss_u_log = []
        self.loss_r_log = []

        self.ep1_log = []
        self.ep2_log = []

        self.saver = tf.train.Saver()

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

    def initialize_NN(self, layers):
        weights = []
        biases = []
        num_layers = len(layers)
        for l in range(0, num_layers - 1):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.zeros([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        return weights, biases

    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
        return tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)

    def neural_net(self, H):
        num_layers = len(self.layers)
        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H

    def net_u(self, t, x, y):
        # Compute scalar potentials
        out = self.neural_net(tf.concat([t, x, y], 1))
        u = out[:, 0:1]
        v = out[:, 1:2]

        # De-normalize
        u = u * self.sigma_u + self.mu_u
        v = v * self.sigma_v + self.mu_v

        return u, v

    def net_r(self, t, x, y):
        u, v = self.net_u(t, x, y)

        u_t = tf.gradients(u, t)[0] / self.sigma_t
        u_x = tf.gradients(u, x)[0] / self.sigma_x
        u_y = tf.gradients(u, y)[0] / self.sigma_y

        v_t = tf.gradients(v, t)[0] / self.sigma_t
        v_x = tf.gradients(v, x)[0] / self.sigma_x
        v_y = tf.gradients(v, y)[0] / self.sigma_y

        u_xx = tf.gradients(u_x, x)[0] / self.sigma_x
        u_yy = tf.gradients(u_y, y)[0] / self.sigma_y

        v_xx = tf.gradients(v_x, x)[0] / self.sigma_x
        v_yy = tf.gradients(v_y, y)[0] / self.sigma_y

        u_res = u_t - tf.exp(self.epsilon1) * (u_xx + u_yy) - self.b * (1 - u) + u * tf.square(v)
        v_res = v_t - tf.exp(self.epsilon2) * (v_xx + v_yy) + self.d * v - u * tf.square(v)

        return u_res, v_res

    def fetch_minibatch_data(self, N):
        X, Y = self.data_sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def fetch_minibatch_residual(self, N):
        X = self.residual_sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X

    def train(self, nIter=10000, batch_size=128):
        start_time = time.time()
        for it in range(nIter):
            X_u_batch, U_batch = self.fetch_minibatch_data(batch_size)
            X_r_batch, _ = self.fetch_minibatch_residual(batch_size)

            tf_dict = {self.t_u_tf: X_u_batch[:, 0:1], self.x_u_tf: X_u_batch[:, 1:2], self.y_u_tf: X_u_batch[:, 2:3],
                       self.t_r_tf: X_r_batch[:, 0:1], self.x_r_tf: X_r_batch[:, 1:2], self.y_r_tf: X_r_batch[:, 2:3],
                       self.u_tf: U_batch[:, 0:1], self.v_tf: U_batch[:, 1:2]}

            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = time.time() - start_time
                loss_u_value = self.sess.run(self.loss_data, tf_dict)
                loss_r_value = self.sess.run(self.loss_res, tf_dict)

                ep1_value = self.sess.run(self.epsilon1)
                ep2_value = self.sess.run(self.epsilon2)

                self.loss_u_log.append(loss_u_value)
                self.loss_r_log.append(loss_r_value)

                self.ep1_log.append(np.exp(ep1_value))
                self.ep2_log.append(np.exp(ep2_value))

                print('It: %d, Data: %.3e, Residual: %.3e, Time: %.2f' %
                      (it, loss_u_value, loss_r_value, elapsed))

                print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1_value), np.exp(ep2_value)))

                start_time = time.time()

    def predict(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1],
                   self.x_u_tf: X_star[:, 1:2],
                   self.y_u_tf: X_star[:, 2:3]}
        u_pred = self.sess.run(self.u_pred, tf_dict)
        v_pred = self.sess.run(self.v_pred, tf_dict)
        return u_pred, v_pred


class Gray_Scott2D_FF:
    # Initialize the class
    def __init__(self, data_sampler, residual_sampler, layers, b, d):

        N = data_sampler.N
        X, U = data_sampler.sample(N)

        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]
        self.mu_y, self.sigma_y = self.mu_X[2], self.sigma_X[2]

        self.mu_U, self.sigma_U = U.mean(0), U.std(0)
        self.mu_u, self.sigma_u = self.mu_U[0], self.sigma_U[0]
        self.mu_v, self.sigma_v = self.mu_U[1], self.sigma_U[1]

        # Samplers
        self.data_sampler = data_sampler
        self.residual_sampler = residual_sampler

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        #        self.W_t = tf.Variable(tf.random_uniform([1, layers[0] // 2], minval=0, maxval=1), dtype=tf.float32)
        #        self.b_t = tf.Variable(tf.random_uniform([1, layers[0]], dtype=tf.float32), dtype=tf.float32)
        #
        #        self.W_x = tf.Variable(tf.random_uniform([2, layers[0] // 2], minval=0, maxval=20), dtype=tf.float32)
        #        self.b_x = tf.Variable(tf.random_uniform([2, layers[0]], dtype=tf.float32), dtype=tf.float32)

        self.W_t = tf.Variable(tf.random_normal([1, layers[0] // 2], dtype=tf.float32) * 1, dtype=tf.float32,
                               trainable=False)

        self.W_x = tf.Variable(tf.random_normal([2, layers[0] // 2], dtype=tf.float32) * 30, dtype=tf.float32,
                               trainable=False)

        # Parameters
        self.epsilon1 = tf.Variable(-10.0, dtype=tf.float32)
        self.epsilon2 = tf.Variable(-10.0, dtype=tf.float32)

        self.b = b
        self.d = d

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.v_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.w_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.y_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.y_r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.u_pred, self.v_pred = self.net_u(self.t_u_tf,
                                              self.x_u_tf,
                                              self.y_u_tf)

        self.u_res_pred, self.v_res_pred = self.net_r(self.t_r_tf,
                                                      self.x_r_tf,
                                                      self.y_r_tf)

        # Data loss
        self.loss_u_data = tf.reduce_mean(tf.square(self.u_tf - self.u_pred))
        self.loss_v_data = tf.reduce_mean(tf.square(self.v_tf - self.v_pred))
        self.loss_data = self.loss_u_data + self.loss_v_data

        # Residual loss
        self.loss_res_u = tf.reduce_mean(tf.square(self.u_res_pred))
        self.loss_res_v = tf.reduce_mean(tf.square(self.v_res_pred))

        self.loss_res = self.loss_res_u + self.loss_res_v

        # Total loss
        self.loss = self.loss_data + self.loss_res

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                        self.global_step,
                                                        1000, 0.9,
                                                        staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss,
                                                                            global_step=self.global_step)

        # Logger
        self.loss_u_log = []
        self.loss_r_log = []

        self.ep1_log = []
        self.ep2_log = []

        self.saver = tf.train.Saver()

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
        return tf.Variable(tf.random_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)

    def initialize_NN(self, layers):
        weights = []
        biases = []

        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)

        W = self.xavier_init(size=[layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)

        return weights, biases

    def neural_net(self, H):
        num_layers = len(self.layers)
        t = H[:, 0:1]
        x = H[:, 1:3]

        H_t = tf.concat([tf.sin(tf.matmul(t, self.W_t)),
                         tf.cos(tf.matmul(t, self.W_t))], 1)  # (N ,100))

        H_x = tf.concat([tf.sin(tf.matmul(x, self.W_x)),
                         tf.cos(tf.matmul(x, self.W_x))], 1)

        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]

            H_t = tf.tanh(tf.add(tf.matmul(H_t, W), b))
            H_x = tf.tanh(tf.add(tf.matmul(H_x, W), b))

        H = tf.multiply(H_t, H_x)

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H

    def net_u(self, t, x, y):
        # Compute scalar potentials
        out = self.neural_net(tf.concat([t, x, y], 1))
        u = out[:, 0:1]
        v = out[:, 1:2]

        # De-normalize
        u = u * self.sigma_u + self.mu_u
        v = v * self.sigma_v + self.mu_v

        return u, v

    def net_r(self, t, x, y):
        u, v = self.net_u(t, x, y)

        u_t = tf.gradients(u, t)[0] / self.sigma_t
        u_x = tf.gradients(u, x)[0] / self.sigma_x
        u_y = tf.gradients(u, y)[0] / self.sigma_y

        v_t = tf.gradients(v, t)[0] / self.sigma_t
        v_x = tf.gradients(v, x)[0] / self.sigma_x
        v_y = tf.gradients(v, y)[0] / self.sigma_y

        u_xx = tf.gradients(u_x, x)[0] / self.sigma_x
        u_yy = tf.gradients(u_y, y)[0] / self.sigma_y

        v_xx = tf.gradients(v_x, x)[0] / self.sigma_x
        v_yy = tf.gradients(v_y, y)[0] / self.sigma_y

        u_res = u_t - tf.exp(self.epsilon1) * (u_xx + u_yy) - self.b * (1 - u) + u * tf.square(v)
        v_res = v_t - tf.exp(self.epsilon2) * (v_xx + v_yy) + self.d * v - u * tf.square(v)

        #        u_res = u_t - self.epsilon1 * (u_xx + u_yy) - self.b * (1 - u) + u * tf.square(v)
        #        v_res = v_t - self.epsilon2 * (v_xx + v_yy) + self.d * v - u * tf.square(v)

        return u_res, v_res

    def fetch_minibatch_data(self, N):
        X, Y = self.data_sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def fetch_minibatch_residual(self, N):
        X, Y = self.residual_sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def train(self, nIter=10000, batch_size=128):
        start_time = time.time()
        for it in range(nIter):
            X_u_batch, U_batch = self.fetch_minibatch_data(batch_size)
            X_r_batch, _ = self.fetch_minibatch_residual(batch_size)

            tf_dict = {self.t_u_tf: X_u_batch[:, 0:1], self.x_u_tf: X_u_batch[:, 1:2], self.y_u_tf: X_u_batch[:, 2:3],
                       self.t_r_tf: X_r_batch[:, 0:1], self.x_r_tf: X_r_batch[:, 1:2], self.y_r_tf: X_r_batch[:, 2:3],
                       self.u_tf: U_batch[:, 0:1], self.v_tf: U_batch[:, 1:2]}

            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 10 == 0:
                elapsed = time.time() - start_time
                loss_u_value = self.sess.run(self.loss_data, tf_dict)
                loss_r_value = self.sess.run(self.loss_res, tf_dict)

                ep1_value = self.sess.run(self.epsilon1)
                ep2_value = self.sess.run(self.epsilon2)

                self.loss_u_log.append(loss_u_value)
                self.loss_r_log.append(loss_r_value)

                self.ep1_log.append(np.exp(ep1_value))
                self.ep2_log.append(np.exp(ep2_value))

                print('It: %d, Data: %.3e, Residual: %.3e, Time: %.2f' %
                      (it, loss_u_value, loss_r_value, elapsed))

                print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1_value), np.exp(ep2_value)))
                #                print('ep1: {:.3e}, ep2: {:.3e}'.format(ep1_value, ep2_value))

                start_time = time.time()

    def predict(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1],
                   self.x_u_tf: X_star[:, 1:2],
                   self.y_u_tf: X_star[:, 2:3]}
        u_pred = self.sess.run(self.u_pred, tf_dict)
        v_pred = self.sess.run(self.v_pred, tf_dict)
        return u_pred, v_pred


class Gray_Scott2D_ST_mFF:
    # Initialize the class
    def __init__(self, data_sampler, residual_sampler, layers, b, d):

        N = data_sampler.N
        X, U = data_sampler.sample(N)

        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]
        self.mu_y, self.sigma_y = self.mu_X[2], self.sigma_X[2]

        self.mu_U, self.sigma_U = U.mean(0), U.std(0)
        self.mu_u, self.sigma_u = self.mu_U[0], self.sigma_U[0]
        self.mu_v, self.sigma_v = self.mu_U[1], self.sigma_U[1]

        # Samplers
        self.data_sampler = data_sampler
        self.residual_sampler = residual_sampler

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        self.W_t = tf.Variable(tf.random_normal([1, layers[0] // 2], dtype=tf.float32) * 1, dtype=tf.float32,
                               trainable=False)

        self.W1_x = tf.Variable(tf.random_normal([2, layers[0] // 2], dtype=tf.float32) * 1, dtype=tf.float32,
                                trainable=False)
        self.W2_x = tf.Variable(tf.random_normal([2, layers[0] // 2], dtype=tf.float32) * 10, dtype=tf.float32,
                                trainable=False)
        self.W3_x = tf.Variable(tf.random_normal([2, layers[0] // 2], dtype=tf.float32) * 50, dtype=tf.float32,
                                trainable=False)

        # Parameters
        #        self.epsilon1 = epsilon1
        #        self.epsilon2 = epsilon2

        self.epsilon1 = tf.Variable(-10.0, dtype=tf.float32)
        self.epsilon2 = tf.Variable(-10.0, dtype=tf.float32)
        self.b = b
        self.d = d

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.v_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.w_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.y_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.y_r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.u_pred, self.v_pred = self.net_u(self.t_u_tf,
                                              self.x_u_tf,
                                              self.y_u_tf)

        self.u_res_pred, self.v_res_pred = self.net_r(self.t_r_tf,
                                                      self.x_r_tf,
                                                      self.y_r_tf)

        # Data loss
        self.loss_u_data = tf.reduce_mean(tf.square(self.u_tf - self.u_pred))
        self.loss_v_data = tf.reduce_mean(tf.square(self.v_tf - self.v_pred))
        self.loss_data = self.loss_u_data + self.loss_v_data

        # Residual loss
        self.loss_res_u = tf.reduce_mean(tf.square(self.u_res_pred))
        self.loss_res_v = tf.reduce_mean(tf.square(self.v_res_pred))

        self.loss_res = self.loss_res_u + self.loss_res_v

        # Total loss
        self.loss = self.loss_data + self.loss_res

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                        self.global_step,
                                                        5000, 0.9,
                                                        staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss,
                                                                            global_step=self.global_step)

        # Logger
        self.loss_u_log = []
        self.loss_r_log = []

        self.ep1_log = []
        self.ep2_log = []

        self.saver = tf.train.Saver()

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = np.sqrt(2 / (in_dim + out_dim))
        return tf.Variable(tf.random_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)

    def initialize_NN(self, layers):
        weights = []
        biases = []

        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)

        W = self.xavier_init(size=[3 * layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)

        return weights, biases

    def neural_net(self, H):
        num_layers = len(self.layers)
        t = H[:, 0:1]
        x = H[:, 1:3]

        H_t = tf.concat([tf.sin(tf.matmul(t, self.W_t)),
                         tf.cos(tf.matmul(t, self.W_t))], 1)  # (N ,100))

        H1_x = tf.concat([tf.sin(tf.matmul(x, self.W1_x)),
                          tf.cos(tf.matmul(x, self.W1_x))], 1)

        H2_x = tf.concat([tf.sin(tf.matmul(x, self.W2_x)),
                          tf.cos(tf.matmul(x, self.W2_x))], 1)

        H3_x = tf.concat([tf.sin(tf.matmul(x, self.W3_x)),
                          tf.cos(tf.matmul(x, self.W3_x))], 1)

        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]

            H_t = tf.tanh(tf.add(tf.matmul(H_t, W), b))

            H1_x = tf.tanh(tf.add(tf.matmul(H1_x, W), b))
            H2_x = tf.tanh(tf.add(tf.matmul(H2_x, W), b))
            H3_x = tf.tanh(tf.add(tf.matmul(H3_x, W), b))

        H1 = tf.multiply(H_t, H1_x)
        H2 = tf.multiply(H_t, H2_x)
        H3 = tf.multiply(H_t, H3_x)

        H = tf.concat([H1, H2, H3], 1)

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H

    def net_u(self, t, x, y):
        # Compute scalar potentials
        out = self.neural_net(tf.concat([t, x, y], 1))
        u = out[:, 0:1]
        v = out[:, 1:2]

        # De-normalize
        u = u * self.sigma_u + self.mu_u
        v = v * self.sigma_v + self.mu_v

        return u, v

    def net_r(self, t, x, y):
        u, v = self.net_u(t, x, y)

        u_t = tf.gradients(u, t)[0] / self.sigma_t
        u_x = tf.gradients(u, x)[0] / self.sigma_x
        u_y = tf.gradients(u, y)[0] / self.sigma_y

        v_t = tf.gradients(v, t)[0] / self.sigma_t
        v_x = tf.gradients(v, x)[0] / self.sigma_x
        v_y = tf.gradients(v, y)[0] / self.sigma_y

        u_xx = tf.gradients(u_x, x)[0] / self.sigma_x
        u_yy = tf.gradients(u_y, y)[0] / self.sigma_y

        v_xx = tf.gradients(v_x, x)[0] / self.sigma_x
        v_yy = tf.gradients(v_y, y)[0] / self.sigma_y

        u_res = u_t - tf.exp(self.epsilon1) * (u_xx + u_yy) - self.b * (1 - u) + u * tf.square(v)
        v_res = v_t - tf.exp(self.epsilon2) * (v_xx + v_yy) + self.d * v - u * tf.square(v)

        return u_res, v_res

    def fetch_minibatch_data(self, N):
        X, Y = self.data_sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def fetch_minibatch_residual(self, N):
        X, Y = self.residual_sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def train(self, nIter=10000, batch_size=128):
        start_time = time.time()
        for it in range(nIter):
            X_u_batch, U_batch = self.fetch_minibatch_data(batch_size)
            X_r_batch, _ = self.fetch_minibatch_residual(batch_size)

            tf_dict = {self.t_u_tf: X_u_batch[:, 0:1], self.x_u_tf: X_u_batch[:, 1:2], self.y_u_tf: X_u_batch[:, 2:3],
                       self.t_r_tf: X_r_batch[:, 0:1], self.x_r_tf: X_r_batch[:, 1:2], self.y_r_tf: X_r_batch[:, 2:3],
                       self.u_tf: U_batch[:, 0:1], self.v_tf: U_batch[:, 1:2]}

            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = time.time() - start_time
                loss_u_value = self.sess.run(self.loss_data, tf_dict)
                loss_r_value = self.sess.run(self.loss_res, tf_dict)

                ep1_value = self.sess.run(self.epsilon1)
                ep2_value = self.sess.run(self.epsilon2)

                self.loss_u_log.append(loss_u_value)
                self.loss_r_log.append(loss_r_value)

                self.ep1_log.append(np.exp(ep1_value))
                self.ep2_log.append(np.exp(ep2_value))

                print('It: %d, Data: %.3e, Residual: %.3e, Time: %.2f' %
                      (it, loss_u_value, loss_r_value, elapsed))

                print('ep1: {:.3e}, ep2: {:.3e}'.format(np.exp(ep1_value), np.exp(ep2_value)))

                start_time = time.time()

    def predict(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1],
                   self.x_u_tf: X_star[:, 1:2],
                   self.y_u_tf: X_star[:, 2:3]}
        u_pred = self.sess.run(self.u_pred, tf_dict)
        v_pred = self.sess.run(self.v_pred, tf_dict)
        return u_pred, v_pred


================================================
FILE: Poisson1D/Compute_Jacobian.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 11 17:45:07 2020

@author: sifan
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import gradients_impl as gradient_ops
from tensorflow.python.ops.parallel_for import control_flow_ops
from tensorflow.python.util import nest

def jacobian(output, inputs, use_pfor=True, parallel_iterations=None):
  """Computes jacobian of `output` w.r.t. `inputs`.
  Args:
    output: A tensor.
    inputs: A tensor or a nested structure of tensor objects.
    use_pfor: If true, uses pfor for computing the jacobian. Else uses
      tf.while_loop.
    parallel_iterations: A knob to control how many iterations and dispatched in
      parallel. This knob can be used to control the total memory usage.
  Returns:
    A tensor or a nested structure of tensors with the same structure as
    `inputs`. Each entry is the jacobian of `output` w.r.t. to the corresponding
    value in `inputs`. If output has shape [y_1, ..., y_n] and inputs_i has
    shape [x_1, ..., x_m], the corresponding jacobian has shape
    [y_1, ..., y_n, x_1, ..., x_m]. Note that in cases where the gradient is
    sparse (IndexedSlices), jacobian function currently makes it dense and
    returns a Tensor instead. This may change in the future.
  """
  flat_inputs = nest.flatten(inputs)
  output_tensor_shape = output.shape
  output_shape = array_ops.shape(output)
  output = array_ops.reshape(output, [-1])

  def loop_fn(i):
    y = array_ops.gather(output, i)
    return gradient_ops.gradients(y, flat_inputs,  unconnected_gradients=tf.UnconnectedGradients.ZERO)

  try:
    output_size = int(output.shape[0])
  except TypeError:
    output_size = array_ops.shape(output)[0]

  if use_pfor:
    pfor_outputs = control_flow_ops.pfor(
        loop_fn, output_size, parallel_iterations=parallel_iterations)
  else:
    pfor_outputs = control_flow_ops.for_loop(
        loop_fn,
        [output.dtype] * len(flat_inputs),
        output_size,
        parallel_iterations=parallel_iterations)

  for i, out in enumerate(pfor_outputs):
    if isinstance(out, ops.Tensor):
      new_shape = array_ops.concat(
          [output_shape, array_ops.shape(out)[1:]], axis=0)
      out = array_ops.reshape(out, new_shape)
      out.set_shape(output_tensor_shape.concatenate(flat_inputs[i].shape))
      pfor_outputs[i] = out

  return nest.pack_sequence_as(inputs, pfor_outputs)

================================================
FILE: Poisson1D/Poisson_1D.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Tue Sep  1 13:52:42 2020

@author: Wsf12
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
import seaborn as sns
from models_tf import Sampler, NN, NN_FF, NN_mFF


if __name__ == '__main__':

    # Hyper-parameters
    a = 2
    b = 50

    # Exact solution
    def u(x, a, b):
        return np.sin(np.pi * a * x) + 0.1 * np.sin(np.pi * b * x)

    # Exact PDE residual
    def u_xx(x, a, b):
        return - (np.pi * a) ** 2 * np.sin(np.pi * a * x) - 0.1 * (np.pi * b) ** 2 * np.sin(np.pi * b * x)

    # Define computational domain
    bc1_coords = np.array([[0.0],
                           [0.0]])

    bc2_coords = np.array([[1.0],
                           [1.0]])

    dom_coords = np.array([[0.0],
                           [1.0]])

    # Create boundary sampler
    bc1 = Sampler(1, bc1_coords, lambda x: u(x, a, b), name='Dirichlet BC1')
    bc2 = Sampler(1, bc2_coords, lambda x: u(x, a, b), name='Dirichlet BC2')

    bcs_samplers = [bc1, bc2]

    # Create residual sampler
    res_samplers = Sampler(1, dom_coords, lambda x: u_xx(x, a, b), name='Forcing')

    # Define model
    # For NN model, please use layers = [1, 100, 100, 1]
    layers = [100, 100, 1]
    
    # Hyper-parameter for Fourier features
    sigma = 10
    
    # NN: Vanilla MLP
    # NN_FF : Vanilla Fourier feature network
    # NN_mFF : Multi-scale Fourier feature network
    model = NN(layers, bcs_samplers, res_samplers, u, a, b, sigma)

    # Train model
    model.train(nIter=40000, batch_size=128, log_NTK=False, log_weights=False)

    # Create test data
    nn = 10000
    X_star = np.linspace(dom_coords[0, 0], dom_coords[1, 0], nn)[:, None]
    u_star = u(X_star, a, b)
    r_star = u_xx(X_star, a, b)

    # Predictions
    u_pred = model.predict_u(X_star)
    r_pred = model.predict_r(X_star)
    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
    error_r = np.linalg.norm(r_star - r_pred, 2) / np.linalg.norm(r_star, 2)

    print('Relative L2 error_u: {:.2e}'.format(error_u))
    print('Relative L2 error_r: {:.2e}'.format(error_r))
            
    loss_bcs = model.loss_bcs_log
    loss_res = model.loss_res_log
    l2_error = model.l2_error_log
    
    # Plot
    fig = plt.figure(figsize=(18, 5))
    with sns.axes_style("darkgrid"):
        plt.subplot(1, 3, 1)
        plt.plot(X_star, u_star, label='Exact')
        plt.plot(X_star, u_pred, '--', label='Predicted')
        plt.xlabel('$x$')
        plt.ylabel('$y$')
        plt.legend(fontsize=20, loc='upper left')
        plt.tight_layout()

        plt.subplot(1, 3, 2)
        plt.plot(X_star, u_star - u_pred, label='Error')
        plt.xlabel('$x$')
        plt.ylabel('Point-wise error')
        plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
        plt.tight_layout()

        plt.subplot(1, 3, 3)
        iters = 100 * np.arange(len(loss_res))

        plt.plot(iters, loss_res, label='$\mathcal{L}_{r}$', linewidth=2)
        plt.plot(iters, loss_bcs, label='$\mathcal{L}_{b}$', linewidth=2)
        plt.plot(iters, l2_error, label=r'$L^2$ error', linewidth=2)

        plt.yscale('log')
        plt.xlabel('iterations')
        plt.legend(loc='upper right', bbox_to_anchor=(1.0, 0.9), fontsize=20)
        plt.tight_layout()
        plt.show()


================================================
FILE: Poisson1D/models_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Tue Sep  1 14:17:33 2020

@author: Wsf12
"""

import tensorflow as tf
from Compute_Jacobian import jacobian
import numpy as np
import timeit

class Sampler:
    # Initialize the class
    def __init__(self, dim, coords, func, name=None):
        self.dim = dim
        self.coords = coords
        self.func = func
        self.name = name

    def sample(self, N):
        x = self.coords[0:1, :] + (self.coords[1:2, :] - self.coords[0:1, :]) * np.random.rand(N, self.dim)
        y = self.func(x)
        return x, y

class NN:
    def __init__(self, layers, bcs_samplers, res_samplers, u, a, b, sigma):

        # Normalize the input
        X, _ = res_samplers.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_x, self.sigma_x = self.mu_X[0], self.sigma_X[0]
        
        # Samplers
        self.bcs_samplers = bcs_samplers
        self.res_samplers = res_samplers

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        
        # Define placeholders and computational graph
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        # Evaluate predictions
        self.u_bc1_pred = self.net_u(self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.x_bc2_tf)

        self.u_pred = self.net_u(self.x_u_tf)
        self.r_pred = self.net_r(self.x_r_tf)

        # Boundary loss
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred - self.u_bc1_tf))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred - self.u_bc2_tf))

        self.loss_bcs = self.loss_bc1 + self.loss_bc2

        # Residual loss        
        self.loss_res = tf.reduce_mean(tf.square(self.r_tf - self.r_pred))
        
        # Total loss
        self.loss = self.loss_res + self.loss_bcs
        
        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

        # Test data
        N_test = 1000
        
        self.X_star = np.linspace(0, 1, N_test)[:, None]
        self.u_star = u(self.X_star, a,b)

        # Logger
        self.loss_bcs_log = []
        self.loss_res_log = []
        self.l2_error_log = []

        # Saver
        self.saver = tf.train.Saver()

    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    # Initialize network weights and biases using Xavier initialization
    def initialize_NN(self, layers):
        weights = []
        biases = []
        num_layers = len(layers)
        for l in range(0, num_layers - 1):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        return weights, biases
        
    def forward_pass(self, H):
        num_layers = len(self.layers)
        
        for l in range(0, num_layers - 2): # number_layers  - 1?
            W = self.weights[l]
            b = self.biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
            
        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H
        
    def net_u(self, x):
        u = self.forward_pass(x)
        return u

    # Forward pass for f
    def net_r(self, x):
        u = self.net_u(x)

        u_x = tf.gradients(u, x)[0] / self.sigma_x
        u_xx = tf.gradients(u_x, x)[0] / self.sigma_x

        res_u = u_xx
        return res_u

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, batch_size=128, log_NTK=True, log_weights=True):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_bc1_batch, u_bc1_batch = self.fetch_minibatch(self.bcs_samplers[0], batch_size)
            X_bc2_batch, u_bc2_batch = self.fetch_minibatch(self.bcs_samplers[1], batch_size)

            # Fetch residual mini-batch
            X_res_batch, f_batch = self.fetch_minibatch(self.res_samplers,  batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.x_bc1_tf: X_bc1_batch, self.x_bc2_tf: X_bc2_batch,
                       self.u_bc1_tf: u_bc1_batch, self.u_bc2_tf: u_bc2_batch,
                       self.x_u_tf: X_res_batch, self.x_r_tf: X_res_batch,
                       self.r_tf: f_batch
                       }
        
            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 1000 == 0:
                elapsed = timeit.default_timer() - start_time
                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value, loss_res_value = self.sess.run([self.loss_bcs, self.loss_res], tf_dict)
                
                u_pred = self.predict_u(self.X_star)
                error_u = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)
                
                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_res_log.append(loss_res_value)
                self.l2_error_log.append(error_u)

                print('It: %d, Loss: %.3e, Loss_bcs: %.3e, Loss_res: %.3e ,Time: %.2f' %
                      (it, loss_value, loss_bcs_value, loss_res_value, elapsed))

                start_time = timeit.default_timer()

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_u_tf: X_star}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates predictions at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_r_tf: X_star}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


class NN_FF:
    def __init__(self, layers, bcs_samplers, res_samplers, u, a, b, sigma):

        # Normalize the input
        X, _ = res_samplers.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_x, self.sigma_x = self.mu_X[0], self.sigma_X[0]
        
        # Samplers
        self.bcs_samplers = bcs_samplers
        self.res_samplers = res_samplers

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Initialize Fourier features
        self.W = tf.Variable(tf.random_normal([1, layers[0] //2], dtype=tf.float32) * sigma, dtype=tf.float32, trainable=False)

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        
        # Define placeholders and computational graph
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        # Evaluate predictions
        self.u_bc1_pred = self.net_u(self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.x_bc2_tf)

        self.u_pred = self.net_u(self.x_u_tf)
        self.r_pred = self.net_r(self.x_r_tf)

        # Boundary loss
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred - self.u_bc1_tf))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred - self.u_bc2_tf))

        self.loss_bcs = self.loss_bc1 + self.loss_bc2

        # Residual loss        
        self.loss_res = tf.reduce_mean(tf.square(self.r_tf - self.r_pred))
        
        # Total loss
        self.loss = self.loss_res + self.loss_bcs
        
        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        100, 0.99, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)
        
        # Test data
        N_test = 1000
        self.X_star = np.linspace(0, 1, N_test)[:, None]
        self.u_star = u(self.X_star, a, b)
        self.l2_error_log = []
        
        # Logger
        self.loss_bcs_log = []
        self.loss_res_log = []
        self.l2_error_log = []

        # Saver
        self.saver = tf.train.Saver()

    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    # Initialize network weights and biases using Xavier initialization
    def initialize_NN(self, layers):
        weights = []
        biases = []
        num_layers = len(layers)
        for l in range(0, num_layers - 1):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        return weights, biases
        
    def forward_pass(self, H):
        num_layers = len(self.layers)

        # Fourier feature encoding
        H = tf.concat([tf.sin(tf.matmul(H, self.W)),
                       tf.cos(tf.matmul(H, self.W))], 1) 

        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
            
        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H
        
    def net_u(self, x):
        u = self.forward_pass(x)
        return u

    # Forward pass for f
    def net_r(self, x):
        u = self.net_u(x)

        u_x = tf.gradients(u, x)[0] / self.sigma_x
        u_xx = tf.gradients(u_x, x)[0] / self.sigma_x

        res_u = u_xx
        return res_u
    
    # Compute Jacobian for each weights and biases in each layer and retrun a list 
    def compute_jacobian(self, f):
        J_list =[]
    
        L = len(self.weights)    
        for i in range(L):
            J_w = jacobian(f, self.weights[i])
            J_list.append(J_w)
     
        for i in range(L):
            J_b = jacobian(f, self.biases[i])
            J_list.append(J_b)
        return J_list
    
    # Compute the empirical NTK = J J^T
    def compute_ntk(self, J1_list, x1, J2_list, x2):
        D = x1.shape[0]
        N = len(J1_list)
        
        Ker = tf.zeros((D,D))
        for k in range(N):
            J1 = tf.reshape(J1_list[k], shape=(D,-1))
            J2 = tf.reshape(J2_list[k], shape=(D,-1))
            
            K = tf.matmul(J1, tf.transpose(J2))
            Ker = Ker + K
        return Ker

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, batch_size=128, log_NTK=True, log_weights=True):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_bc1_batch, u_bc1_batch = self.fetch_minibatch(self.bcs_samplers[0], batch_size)
            X_bc2_batch, u_bc2_batch = self.fetch_minibatch(self.bcs_samplers[1], batch_size)

            # Fetch residual mini-batch
            X_res_batch, f_batch = self.fetch_minibatch(self.res_samplers,  batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.x_bc1_tf: X_bc1_batch, self.x_bc2_tf: X_bc2_batch,
                       self.u_bc1_tf: u_bc1_batch, self.u_bc2_tf: u_bc2_batch,
                       self.x_u_tf: X_res_batch, self.x_r_tf: X_res_batch,
                       self.r_tf: f_batch
                       }
        
            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time
                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value, loss_res_value = self.sess.run([self.loss_bcs, self.loss_res], tf_dict)
                
                u_pred = self.predict_u(self.X_star)
                error_u = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)
                
                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_res_log.append(loss_res_value)
                self.l2_error_log.append(error_u)

                print('It: %d, Loss: %.3e, Loss_bcs: %.3e, Loss_res: %.3e ,Time: %.2f' %
                      (it, loss_value, loss_bcs_value, loss_res_value, elapsed))

                start_time = timeit.default_timer()

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_u_tf: X_star}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates predictions at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_r_tf: X_star}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star

   
class NN_mFF:
    def __init__(self, layers, bcs_samplers, res_samplers, u,a, b, sigma):

        # Normalize the input
        X, _ = res_samplers.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_x, self.sigma_x = self.mu_X[0], self.sigma_X[0]
        
        # Samplers
        self.bcs_samplers = bcs_samplers
        self.res_samplers = res_samplers

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Initialize Fourier features
        self.W1 = tf.Variable(tf.random_normal([1, layers[0] //2], dtype=tf.float32) * 1, dtype=tf.float32, trainable=False)
        self.W2 = tf.Variable(tf.random_normal([1, layers[0] //2], dtype=tf.float32) * sigma, dtype=tf.float32, trainable=False)

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        
        # Define placeholders and computational graph
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        # Evaluate predictions
        self.u_bc1_pred = self.net_u(self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.x_bc2_tf)

        self.u_pred = self.net_u(self.x_u_tf)
        self.r_pred = self.net_r(self.x_r_tf)

        # Boundary loss
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred - self.u_bc1_tf))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred - self.u_bc2_tf))

        self.loss_bcs = self.loss_bc1 + self.loss_bc2

        # Residual loss        
        self.loss_res = tf.reduce_mean(tf.square(self.r_tf - self.r_pred))
        
        # Total loss
        self.loss = self.loss_res + self.loss_bcs
        
        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)
        
        # Test data
        N_test = 1000
        self.X_star = np.linspace(0, 1, N_test)[:, None]
        self.u_star = u(self.X_star, a,b)
        
        self.l2_error_log = []
        
        # Logger
        self.loss_bcs_log = []
        self.loss_res_log = []
        self.saver = tf.train.Saver()

    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    # Initialize network weights and biases using Xavier initialization
    def initialize_NN(self, layers):
        weights = []
        biases = []
        
        num_layers = len(layers)
    
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        
        W = self.xavier_init(size=[2 * layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)
        
        return weights, biases
    
    def forward_pass(self, H):
        num_layers = len(self.layers)

        # Fourier feature encodings
        H1 = tf.concat([tf.sin(tf.matmul(H, self.W1)),
                        tf.cos(tf.matmul(H, self.W1))], 1)
        H2 = tf.concat([tf.sin(tf.matmul(H, self.W2)),
                        tf.cos(tf.matmul(H, self.W2))], 1)   # H1  (N ,50))

        for l in range(0, num_layers-2):
            W = self.weights[l]
            b = self.biases[l]
            H1 = tf.tanh(tf.add(tf.matmul(H1, W), b))
            
            W = self.weights[l]
            b = self.biases[l]
            H2 = tf.tanh(tf.add(tf.matmul(H2, W), b))

        # Concatenate the network outputs
        H = tf.concat([H1, H2], 1)
        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H
        
    def net_u(self, x):
        u = self.forward_pass(x)
        return u

    # Forward pass for f
    def net_r(self, x):
        u = self.net_u(x)

        u_x = tf.gradients(u, x)[0] / self.sigma_x
        u_xx = tf.gradients(u_x, x)[0] / self.sigma_x

        res_u = u_xx
        return res_u

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, batch_size=128, log_NTK=True, log_weights=True):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_bc1_batch, u_bc1_batch = self.fetch_minibatch(self.bcs_samplers[0], batch_size)
            X_bc2_batch, u_bc2_batch = self.fetch_minibatch(self.bcs_samplers[1], batch_size)

            # Fetch residual mini-batch
            X_res_batch, f_batch = self.fetch_minibatch(self.res_samplers,  batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.x_bc1_tf: X_bc1_batch, self.x_bc2_tf: X_bc2_batch,
                       self.u_bc1_tf: u_bc1_batch, self.u_bc2_tf: u_bc2_batch,
                       self.x_u_tf: X_res_batch, self.x_r_tf: X_res_batch,
                       self.r_tf: f_batch
                       }
        
            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time
                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value, loss_res_value = self.sess.run([self.loss_bcs, self.loss_res], tf_dict)

                u_pred = self.predict_u(self.X_star)
                error_u = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)
                
                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_res_log.append(loss_res_value)
                self.l2_error_log.append(error_u)

                print('It: %d, Loss: %.3e, Loss_bcs: %.3e, Loss_res: %.3e ,Time: %.2f' %
                      (it, loss_value, loss_bcs_value, loss_res_value, elapsed))

                start_time = timeit.default_timer()

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_u_tf: X_star}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates predictions at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_r_tf: X_star}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


================================================
FILE: README.md
================================================
## Multi-scale Fourier features for physics-informed neural networks

Code and data (available upon request) accompanying the manuscript titled "On the eigenvector bias of Fourier feature networks: From regression to solving multi-scale PDEs with physics-informed neural networks", authored by Sifan Wang, Hanwen Wang, and Paris Perdikaris.

## Abstract

Physics-informed neural networks (PINNs) are demonstrating remarkable promise in integrating physical models with gappy and noisy observational data, but they still struggle in cases where the target functions to be approximated exhibit high-frequency or multi-scale features. 
In this work we investigate this limitation through the lens of Neural Tangent Kernel (NTK) theory and elucidate how PINNs are biased towards learning functions along the dominant eigen-directions of their limiting NTK. Using this observation, we construct novel architectures that employ spatio-temporal and multi-scale random Fourier features, and justify how such coordinate embedding layers can lead to robust and accurate PINN models. Numerical examples are presented for several challenging cases where conventional PINN models fail,  including wave propagation and reaction-diffusion dynamics, illustrating how the proposed methods can be used to effectively tackle both forward and inverse problems involving partial differential equations with multi-scale behavior. 

## Citation

    @article{wang2021eigenvector,
      title={On the eigenvector bias of fourier feature networks: From regression to solving multi-scale pdes with physics-informed neural networks},
      author={Wang, Sifan and Wang, Hanwen and Perdikaris, Paris},
      journal={Computer Methods in Applied Mechanics and Engineering},
      volume={384},
      pages={113938},
      year={2021},
      publisher={Elsevier}
      }


================================================
FILE: Regression/Compute_Jacobian.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 11 17:45:07 2020

@author: sifan
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import gradients_impl as gradient_ops
from tensorflow.python.ops.parallel_for import control_flow_ops
from tensorflow.python.util import nest

def jacobian(output, inputs, use_pfor=True, parallel_iterations=None):
  """Computes jacobian of `output` w.r.t. `inputs`.
  Args:
    output: A tensor.
    inputs: A tensor or a nested structure of tensor objects.
    use_pfor: If true, uses pfor for computing the jacobian. Else uses
      tf.while_loop.
    parallel_iterations: A knob to control how many iterations and dispatched in
      parallel. This knob can be used to control the total memory usage.
  Returns:
    A tensor or a nested structure of tensors with the same structure as
    `inputs`. Each entry is the jacobian of `output` w.r.t. to the corresponding
    value in `inputs`. If output has shape [y_1, ..., y_n] and inputs_i has
    shape [x_1, ..., x_m], the corresponding jacobian has shape
    [y_1, ..., y_n, x_1, ..., x_m]. Note that in cases where the gradient is
    sparse (IndexedSlices), jacobian function currently makes it dense and
    returns a Tensor instead. This may change in the future.
  """
  flat_inputs = nest.flatten(inputs)
  output_tensor_shape = output.shape
  output_shape = array_ops.shape(output)
  output = array_ops.reshape(output, [-1])

  def loop_fn(i):
    y = array_ops.gather(output, i)
    return gradient_ops.gradients(y, flat_inputs,  unconnected_gradients=tf.UnconnectedGradients.ZERO)

  try:
    output_size = int(output.shape[0])
  except TypeError:
    output_size = array_ops.shape(output)[0]

  if use_pfor:
    pfor_outputs = control_flow_ops.pfor(
        loop_fn, output_size, parallel_iterations=parallel_iterations)
  else:
    pfor_outputs = control_flow_ops.for_loop(
        loop_fn,
        [output.dtype] * len(flat_inputs),
        output_size,
        parallel_iterations=parallel_iterations)

  for i, out in enumerate(pfor_outputs):
    if isinstance(out, ops.Tensor):
      new_shape = array_ops.concat(
          [output_shape, array_ops.shape(out)[1:]], axis=0)
      out = array_ops.reshape(out, new_shape)
      out.set_shape(output_tensor_shape.concatenate(flat_inputs[i].shape))
      pfor_outputs[i] = out

  return nest.pack_sequence_as(inputs, pfor_outputs)

================================================
FILE: Regression/models_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 11 10:20:01 2020

@author: sifan
"""

import tensorflow as tf
from Compute_Jacobian import jacobian
import numpy as np
import timeit

# Data Sampler
class Sampler:
    # Initialize the class
    def __init__(self, dim, coords, func, name=None):
        self.dim = dim
        self.coords = coords
        self.func = func
        self.name = name

    # Sample function
    def sample(self, N):
        x = self.coords[0:1, :] + (self.coords[1:2, :] - self.coords[0:1, :]) * np.random.rand(N, self.dim)
        y = self.func(x)
        return x, y
    

class NN_FF:
    def __init__(self, layers, X_u, Y_u, a, u, sigma):

        """
        :param layers: Layers of the network
        :param X_u, Y_u: Training data
        :param a:  Hyper-parameter of the target function
        :param u:  the target function
        :param sigma: Hyper-parameter of the Fourier features
        """

        self.mu_X, self.sigma_X = X_u.mean(0), X_u.std(0)
        self.mu_x, self.sigma_x = self.mu_X[0], self.sigma_X[0]

        # Normalize the input of the network
        self.X_u = (X_u - self.mu_X) / self.sigma_X
        self.Y_u = Y_u

        # Initialize Fourier features
        self.W = tf.Variable(tf.random_normal([1, layers[0] //2], dtype=tf.float32) * sigma, dtype=tf.float32, trainable=False)

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)
            
        # Define the size of the Kernel
        self.D_u = X_u.shape[0]
        
        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_ntk_tf = tf.placeholder(tf.float32, shape=(self.D_u, 1))

        # Evaluate predictions
        self.u_pred = self.net_u(self.x_u_tf)

        # Evaluate NTK predictions
        self.u_ntk_pred = self.net_u(self.x_u_ntk_tf)
     
        # Boundary loss
        self.loss_u = tf.reduce_mean(tf.square(self.u_pred - self.u_tf))   
        
        # Total loss
        self.loss = self.loss_u

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)

        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

        # Model Saver
        self.saver = tf.train.Saver()

        # Compute the Jacobian for weights and biases in each hidden layer
        self.J_u = self.compute_jacobian(self.u_ntk_pred)

        # The empirical NTK = J J^T, compute NTK of PINNs
        self.K = self.compute_ntk(self.J_u, self.x_u_ntk_tf, self.J_u, self.x_u_ntk_tf)

        # Loss Logger
        self.loss_u_log = []

        # NTK logger
        self.K_log = []

        # Weights logger
        self.weights_log = []
        self.biases_log = []

        # Training error and test error
        N_train  = 100
        N_test = 1000

        # Training data
        self.X_train = np.linspace(0, 1, N_train)[:, None]
        self.Y_train = u(self.X_train, a)

        # Test data
        self.X_test = np.linspace(0, 1, N_test)[:, None]
        self.Y_test = u(self.X_test, a)

        # Error loggers
        self.train_error_log = []
        self.test_error_log = []

    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    # NTK initialization
    def NTK_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        std = 1. / np.sqrt(in_dim)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * std,
                           dtype=tf.float32)

    # Initialize network weights and biases using Xavier initialization
    def initialize_NN(self, layers):
        weights = []
        biases = []
        num_layers = len(layers)
        for l in range(0, num_layers - 1):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        return weights, biases

    # Evaluate the forward pass
    def forward_pass(self, H):
        num_layers = len(self.layers)
        
        H = tf.concat([tf.sin(tf.matmul(H, self.W)),
                       tf.cos(tf.matmul(H, self.W))], 1) 

        for l in range(0, num_layers - 2): # number_layers  - 1?
            W = self.weights[l]
            b = self.biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
            
        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H

    # Define the neural net
    def net_u(self, x):
        u = self.forward_pass(x)
        return u

    # Compute Jacobian for each weights and biases in each layer and retrun a list
    def compute_jacobian(self, f):
        J_list =[]
        L = len(self.weights)
        for i in range(L):
            J_w = jacobian(f, self.weights[i])
            J_list.append(J_w)

        for i in range(L):
            J_b = jacobian(f, self.biases[i])
            J_list.append(J_b)
        return J_list

    # Compute the empirical NTK = J J^T
    def compute_ntk(self, J1_list, x1, J2_list, x2):
        D1 = x1.shape[0]
        D2 = x2.shape[0]
        N = len(J1_list)

        Ker = tf.zeros((D1, D2))
        for k in range(N):
            J1 = tf.reshape(J1_list[k], shape=(D1, -1))
            J2 = tf.reshape(J2_list[k], shape=(D2, -1))

            K = tf.matmul(J1, tf.transpose(J2))
            Ker = Ker + K
        return Ker

    # Fetch minibatch
    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, log_NTK=True, log_weights=True):

        start_time = timeit.default_timer()

        for it in range(nIter):
            # Fetch  mini-batches
            # Define a dictionary for associating placeholders with data
            tf_dict = {self.x_u_tf: self.X_u, self.u_tf: self.Y_u
                       }

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time

                loss_value = self.sess.run(self.loss, tf_dict)
                loss_u_value = self.sess.run(self.loss_u, tf_dict)

                # Store the loss values
                self.loss_u_log.append(loss_u_value)

                # Compute the training error
                u_pred_train = self.predict_u(self.X_train)
                training_error = np.linalg.norm(self.Y_train - u_pred_train, 2) / np.linalg.norm(self.Y_train, 2)

                # Compute the test error
                u_pred_test = self.predict_u(self.X_test)
                test_error = np.linalg.norm(self.Y_test - u_pred_test, 2) / np.linalg.norm(self.Y_test, 2)

                # Store the training and test errors
                self.train_error_log.append(training_error)
                self.test_error_log.append(test_error)

                # print the loss values
                print('It: %d, Loss: %.3e, Loss_bcs: %.3e,Time: %.2f' %
                      (it, loss_value, loss_u_value, elapsed))

                start_time = timeit.default_timer()

            # Store the NTK matrix for every 100 iterations
            if log_NTK:
                # provide x, x' for NTK
                if it % 100 == 0:
                    print("Compute NTK...")
                    tf_dict = {self.x_u_ntk_tf: self.X_u}
                    K_value = self.sess.run(self.K, tf_dict)
                    self.K_log.append(K_value)

            # Store the weights and biases of the network for every 100 iterations
            if log_weights:
                if it % 100 ==0:
                    print("Weights stored...")
                    weights = self.sess.run(self.weights)
                    biases = self.sess.run(self.biases)
                    
                    self.weights_log.append(weights)
                    self.biases_log.append(biases)
                
    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.x_u_tf: X_star}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star


================================================
FILE: Regression/regression.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 11 10:20:08 2020

@author: sifan
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from models_tf import Sampler, NN_FF


if __name__ == '__main__':

    # Define solution and its Laplace
    def u(x, a):
        return np.sin(np.pi * x) +  np.cos(np.pi * a * x)

    # Define computational domain
    dom_coords = np.array([[0.0],
                           [1.0]])

    # Training data on u(x) 
    N_u = 100
    X_u = np.linspace(dom_coords[0, 0],
                      dom_coords[1, 0], N_u)[:, None]

    a = 10 
    Y_u = u(X_u, a)
    
    # Test data
    nn = 1000
    X_star = np.linspace(dom_coords[0, 0], dom_coords[1, 0], nn)[:, None]
    u_star = u(X_star, a)
    
    # Define the model
    layers = [100, 100, 100, 1]
    sigma = 10   # Hyper-parameter of the Fourier features
    model = NN_FF(layers, X_u, Y_u, a, u,  sigma)

    # Train the model for different epochs
    epoch_list = [10, 90, 900]  # 1000 iterations in total
    u_pred_list = []

    for epoch in epoch_list:
       # Train the model
       model.train(nIter=epoch, log_NTK=True, log_weights=True)
       
       # Predictions
       u_pred = model.predict_u(X_star)
       u_pred_list.append(u_pred)

    # Evaulate the relative l2 error
    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
    print('Relative L2 error_u: {:.2e}'.format(error_u))

    # Create loggers for the eigenvalues of the NTK
    lambda_K_log = []

    # Restore the NTK
    K_list = model.K_log

    for k in range(len(K_list)):
        K = K_list[k]

        # Compute eigenvalues
        lambda_K, eigvec_K = np.linalg.eig(K)
        
        # Sort in descresing order
        lambda_K = np.sort(np.real(lambda_K))[::-1]
        
        # Store eigenvalues
        lambda_K_log.append(lambda_K)
        
    # Change of the NTK
    kernel_diff_list = []
    K0 = K_list[0]
    for K in K_list:
        diff = np.linalg.norm(K - K0) / np.linalg.norm(K0) 
        kernel_diff_list.append(diff)

    #######################
    #######################
    
    # Change of the weights
    def compute_weights_diff(weights_1, weights_2):
        weights = []
        N = len(weights_1)
        for k in range(N):
            weight = weights_1[k] - weights_2[k]
            weights.append(weight)
        return weights
    
    def compute_weights_norm(weights, biases):
        norm = 0
        for w in weights:
            norm = norm + np.sum(np.square(w))
        for b in biases:
            norm = norm + np.sum(np.square(b))
        norm = np.sqrt(norm)
        return norm
    
    # Restore the list weights and biases
    weights_log = model.weights_log
    biases_log = model.biases_log

    # The weights and biases at initialization
    weights_0 = weights_log[0]
    biases_0 = biases_log[0]
    
    weights_init_norm = compute_weights_norm(weights_0, biases_0)

    weights_change_list = []

    # Compute the change of weights and biases of the network
    N = len(weights_log)
    for k in range(N):
        weights_diff = compute_weights_diff(weights_log[k], weights_log[0])
        biases_diff = compute_weights_diff(biases_log[k], biases_log[0])
        
        weights_diff_norm = compute_weights_norm(weights_diff, biases_diff)
        weights_change = weights_diff_norm / weights_init_norm
        weights_change_list.append(weights_change)
    

    #################################
    ############## PLot##############
    #################################

    
    # Model predictions
    fig = plt.figure(1, figsize=(12, 5))
    plt.subplot(1,2,1)
    plt.plot(X_u, Y_u, 'o', label='Exact')
    plt.plot(X_star, u_pred, '--', label='u_pred')
    plt.legend()
    
    plt.subplot(1,2,2)
    plt.plot(X_star, u_star - u_pred, label='Error')
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    # Eigenvalues of NTK
    fig = plt.figure(2, figsize=(6, 5))
    plt.plot(lambda_K_log[0], label = 'n=0')
    plt.plot(lambda_K_log[-1], '--', label = 'n=40,000')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('index')
    plt.ylabel(r'$\lambda_{uu}$')
    plt.title(r'Eigenvalues of ${K}_{uu}$')
    plt.legend()
    plt.show()

    # Loss values
    loss_u = model.loss_u_log
    fig_3 = plt.figure(3, figsize=(6,5))
    plt.plot(loss_u, label='$\mathcal{L}_{u_b}$')
    plt.yscale('log')
    plt.xlabel('iterations')
    plt.ylabel('Loss')
    plt.legend()
    plt.tight_layout()
    plt.show()
    

    # Visualize the eigenvectors of the NTK
    fig = plt.figure(figsize=(12, 6))
    with sns.axes_style("darkgrid"):
        plt.subplot(2,3,1)
        plt.plot(X_u,  np.real(eigvec_K[:,0]))
        plt.tight_layout()
        
        plt.subplot(2,3,2)
        plt.plot(X_u,  np.real(eigvec_K[:,1]))
        plt.tight_layout()
        
        plt.subplot(2,3,3)
        plt.plot(X_u,  np.real(eigvec_K[:,2]))
        plt.tight_layout()
        
        plt.subplot(2,3,4)
        plt.plot(X_u,  np.real(eigvec_K[:,3]))
        plt.tight_layout()
    
        plt.subplot(2,3,5)
        plt.plot(X_u,  np.real(eigvec_K[:,4]))
        plt.tight_layout()
        
        plt.subplot(2,3,6)
        plt.plot(X_u,  np.real(eigvec_K[:,5]))
    
        plt.tight_layout()
        plt.show()
    
    # Visualize the eigenvalues of the NTK
    fig = plt.figure(figsize=(6, 5))
    with sns.axes_style("darkgrid"):
        plt.plot(lambda_K_log[0], label=r'$\sigma={}$'.format(sigma))
        plt.xscale('log')
        plt.yscale('log')
        plt.xlabel('index')
        plt.ylabel(r'$\lambda$') 
        plt.title('Spectrum')
        plt.tight_layout()
        plt.legend()
        plt.show()
        
        
    # Model predictions at different epoch
    fig = plt.figure(figsize=(12,4))
    with sns.axes_style("darkgrid"):
        plt.subplot(1,3,1)
        plt.plot(X_u, Y_u, 'o')
        plt.plot(X_star,  u_star, color = 'C0', alpha=0.4, linewidth=6)
        plt.plot(X_star,  u_pred_list[0], color='C3', linestyle='--')
        plt.title('Epoch = 10')
        plt.tight_layout()
        
        plt.subplot(1,3,2)
        plt.plot(X_u, Y_u, 'o')
        plt.plot(X_star,  u_star, color = 'C0', alpha=0.4, linewidth=6)
        plt.plot(X_star,  u_pred_list[1], color='C3', linestyle='--')
        plt.title('Epoch = 100')
        plt.tight_layout()
        
        plt.subplot(1,3,3)
        plt.plot(X_u, Y_u, 'o')
        plt.plot(X_star,  u_star, color = 'C0', alpha=0.4, linewidth=6)
        plt.plot(X_star,  u_pred_list[2], color='C3', linestyle='--')
        plt.title('Epoch = 200')
        plt.tight_layout()
        plt.show()

    
================================================
FILE: heat1D/heat1D.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
from models_tf import Sampler, heat1D_NN, heat1D_FF, heat1D_ST_FF

if __name__ == '__main__':

    # Define exact solution
    def u(x, a, b):
        """
        :param x: x = (t, x)
        """

        t  = x[:,0:1]
        x = x[:,1:2]
        
        return np.exp(-a * t) * np.sin(b * np.pi * x)

    def u_t(x, a, b):
        return - a * u(x, a, b)

    def u_xx(x, a, b):
        return - (b * np.pi)**2 * u(x, a, b)

    def f(x, a, b):
        k = a / (b * np.pi)**2 
        return u_t(x, a, b) - k * u_xx(x, a, b)

    # Define PDE residual
    def operator(u, t, x, k,  sigma_t=1.0, sigma_x=1.0):
        u_t = tf.gradients(u, t)[0] / sigma_t
        u_x = tf.gradients(u, x)[0] / sigma_x
        u_xx = tf.gradients(u_x, x)[0] / sigma_x
        residual = u_t - k * u_xx
        return residual

    # Parameters of equations
    a = 1
    b = 500
    k = a / (b * np.pi)**2 

    # Domain boundaries
    ics_coords = np.array([[0.0, 0.0],
                           [0.0, 1.0]])
    bc1_coords = np.array([[0.0, 0.0],
                           [1.0, 0.0]])
    bc2_coords = np.array([[0.0, 1.0],
                           [1.0, 1.0]])
    dom_coords = np.array([[0.0, 0.0],
                           [1.0, 1.0]])

    # Create initial conditions samplers
    ics_sampler = Sampler(2, ics_coords, lambda x: u(x, a, b), name='Initial Condition 1')

    # Create boundary conditions samplers
    bc1 = Sampler(2, bc1_coords, lambda x: u(x, a, b), name='Dirichlet BC1')
    bc2 = Sampler(2, bc2_coords, lambda x: u(x, a, b), name='Dirichlet BC2')
    bcs_sampler = [bc1, bc2]

    # Create residual sampler
    res_sampler = Sampler(2, dom_coords, lambda x: f(x, a, b), name='Forcing')

    # Test data
    nn = 100  # nn = 1000
    t = np.linspace(dom_coords[0, 0], dom_coords[1, 0], nn)[:, None]
    x = np.linspace(dom_coords[0, 1], dom_coords[1, 1], nn)[:, None]
    t, x = np.meshgrid(t, x)
    X_star = np.hstack((t.flatten()[:, None], x.flatten()[:, None]))

    u_star = u(X_star, a, b)
    f_star = f(X_star, a, b)

    # Define model
    # heat1D_NN: Plain MLP
    # heat1D_FF: Plain Fourier feature network
    # heat1D_ST_FF: Spatial-temporal Plain Fourier feature network
    
    layers = [100, 100, 100, 1]  # For heat1D_NN, use layers = [1, 100, 100, 100, 1]
    sigma = 500   # Hyper-parameter for Fourier feature embeddings
    model = heat1D_NN(layers, operator, k, 
                             ics_sampler, bcs_sampler, res_sampler, 
                             sigma, X_star, u_star)

    # Train model
    model.train(nIter=40000, batch_size=128)


    # Predictions
    u_pred = model.predict_u(X_star)

    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)
    print('Relative L2 error_u: {:.2e}'.format(error_u))
    

    # Grid data
    U_star = griddata(X_star, u_star.flatten(), (t, x), method='cubic')
    F_star = griddata(X_star, f_star.flatten(), (t, x), method='cubic')
    U_pred = griddata(X_star, u_pred.flatten(), (t, x), method='cubic')
    
    
    # Plot
    fig_1 = plt.figure(1, figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(t, x, U_star, cmap='jet')
    plt.colorbar()
    plt.xlabel('$t$')
    plt.ylabel('$x$')
    plt.title(r'Exact')
    plt.tight_layout()

    plt.subplot(1, 3, 2)
    plt.pcolor(t, x, U_pred, cmap='jet')
    plt.colorbar()
    plt.xlabel('$t$')
    plt.ylabel('$x$')
    plt.title(r'Predicted')
    plt.tight_layout()

    plt.subplot(1, 3, 3)
    plt.pcolor(t, x, np.abs(U_star - U_pred), cmap='jet')
    plt.colorbar()
    plt.xlabel('$t$')
    plt.ylabel('$x$')
    plt.title('Absolute error')
    plt.tight_layout()
    plt.show()

    loss_ics = model.loss_ics_log
    loss_bcs = model.loss_bcs_log
    loss_res = model.loss_res_log
    l2_error = model.l2_error_log
    
    fig_2 = plt.figure(2, figsize=(6, 5))
    with sns.axes_style("darkgrid"):
        iters = 100 * np.arange(len(loss_res))
            
        plt.plot(iters, loss_res, label='$\mathcal{L}_{r}$', linewidth=2)
        plt.plot(iters, loss_bcs, label='$\mathcal{L}_{bc}$', linewidth=2)
        plt.plot(iters, loss_ics, label='$\mathcal{L}_{ic}$', linewidth=2)
        plt.plot(iters, l2_error, label=r'$L^2$ error', linewidth=2)
        
        plt.yscale('log')
        plt.xlabel('iterations')
        plt.legend(ncol=2, fontsize=17)
        plt.tight_layout()
        plt.show()

        
================================================
FILE: heat1D/models_tf.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 15 20:00:21 2020

@author: Wsf12
"""

import tensorflow as tf
import numpy as np
import timeit


class Sampler:
    # Initialize the class
    def __init__(self, dim, coords, func, name = None):
        self.dim = dim
        self.coords = coords
        self.func = func
        self.name = name
    def sample(self, N):
        x = self.coords[0:1,:] + (self.coords[1:2,:]-self.coords[0:1,:])*np.random.rand(N, self.dim)
        y = self.func(x)
        return x, y


class heat1D_NN:
    def __init__(self, layers, operator, k, ics_sampler, bcs_sampler, res_sampler, sigma, X_star, u_star):
        X, _ = res_sampler.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]

        # Samplers
        self.ics_sampler = ics_sampler
        self.bcs_sampler = bcs_sampler
        self.res_sampler = res_sampler

        # Define differential operator
        self.k = k
        self.operator = operator

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        # Evaluate predictions
        self.u_ics_pred = self.net_u(self.t_ics_tf, self.x_ics_tf)
        self.u_bc1_pred = self.net_u(self.t_bc1_tf, self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.t_bc2_tf, self.x_bc2_tf)

        self.u_pred = self.net_u(self.t_u_tf, self.x_u_tf)
        self.r_pred = self.net_r(self.t_r_tf, self.x_r_tf)

        # Boundary loss and Initial loss
        self.loss_ic = tf.reduce_mean(tf.square(self.u_ics_tf - self.u_ics_pred))
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred))

        self.loss_bcs = self.loss_bc1 + self.loss_bc2
        self.loss_ics = self.loss_ic

        # Residual loss
        self.loss_res = tf.reduce_mean(tf.square(self.r_pred))

        # Total loss
        self.loss = self.loss_res + self.loss_bcs + self.loss_ics

        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Test data
        self.X_star = X_star
        self.u_star = u_star

        # Logger
        self.loss_bcs_log = []
        self.loss_ics_log = []
        self.loss_res_log = []
        self.saver = tf.train.Saver()

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    def initialize_NN(self, layers):
        weights = []
        biases = []

        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)

        W = self.xavier_init(size=[layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)

        return weights, biases

    # Evaluates the forward pass
    def forward_pass(self, H):
        num_layers = len(self.layers)

        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H

    # Forward pass for u
    def net_u(self, t, x):
        u = self.forward_pass(tf.concat([t, x], 1))
        return u

    # Forward pass for residual
    def net_r(self, t, x):
        u = self.net_u(t, x)
        residual = self.operator(u, t, x, self.k,
                                 self.sigma_t, self.sigma_x)
        return residual

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def train(self, nIter=10000, batch_size=128):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size)
            X_bc1_batch, u_bc1_batch = self.fetch_minibatch(self.bcs_sampler[0], batch_size)
            X_bc2_batch, u_bc2_batch = self.fetch_minibatch(self.bcs_sampler[1], batch_size)

            # Fetch residual mini-batch
            X_res_batch, _ = self.fetch_minibatch(self.res_sampler, batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.t_ics_tf: X_ics_batch[:, 0:1], self.x_ics_tf: X_ics_batch[:, 1:2],
                       self.u_ics_tf: u_ics_batch,
                       self.t_bc1_tf: X_bc1_batch[:, 0:1], self.x_bc1_tf: X_bc1_batch[:, 1:2],
                       self.t_bc2_tf: X_bc2_batch[:, 0:1], self.x_bc2_tf: X_bc2_batch[:, 1:2],
                       self.t_r_tf: X_res_batch[:, 0:1], self.x_r_tf: X_res_batch[:, 1:2]}

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time
                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value, loss_ics_value, loss_res_value = self.sess.run([self.loss_bcs,
                                                                                self.loss_ics,
                                                                                self.loss_res], tf_dict)
                
                u_pred = self.predict_u(self.X_star)
                error = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)

                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_ics_log.append(loss_ics_value)
                self.loss_res_log.append(loss_res_value)
                self.l2_error_log.append(error)

                print('It: %d, Loss: %.3e, Loss_bcs: %.3e, Loss_ics: %.3e, Loss_res: %.3e, Time: %.2f' %
                      (it, loss_value, loss_bcs_value, loss_ics_value, loss_res_value, elapsed))

                start_time = timeit.default_timer()

    # Evaluate predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1], self.x_u_tf: X_star[:, 1:2]}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluate residual at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_r_tf: X_star[:, 0:1], self.x_r_tf: X_star[:, 1:2]}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


class heat1D_FF:
    def __init__(self, layers, operator, k, ics_sampler, bcs_sampler, res_sampler, sigma, X_star, u_star):
        X, _ = res_sampler.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]
        
        # Samplers
        self.ics_sampler = ics_sampler
        self.bcs_sampler = bcs_sampler
        self.res_sampler = res_sampler
        
        # Define differential operator
        self.k = k
        self.operator = operator
        
        # Fourier hyperparameter
        self.sigma = sigma

        self.W = tf.Variable(tf.random_normal([2, layers[0] //2], dtype=tf.float32)  * sigma, dtype=tf.float32, trainable=False)
        
        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        
        # Evaluate predictions
        self.u_ics_pred = self.net_u(self.t_ics_tf, self.x_ics_tf)
        self.u_bc1_pred = self.net_u(self.t_bc1_tf, self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.t_bc2_tf, self.x_bc2_tf)
        
        self.u_pred = self.net_u(self.t_u_tf, self.x_u_tf)
        self.r_pred = self.net_r(self.t_r_tf, self.x_r_tf)

        # Boundary loss and Initial loss
        self.loss_ic = tf.reduce_mean(tf.square(self.u_ics_tf - self.u_ics_pred))
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred))

        self.loss_bcs = self.loss_bc1 + self.loss_bc2
        self.loss_ics = self.loss_ic
    
        # Residual loss
        self.loss_res = tf.reduce_mean(tf.square(self.r_pred))

        # Total loss
        self.loss = self.loss_res + self.loss_bcs + self.loss_ics
        
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Test data
        self.X_star = X_star
        self.u_star = u_star

        # Logger
        self.loss_bcs_log = []
        self.loss_ics_log = []
        self.loss_res_log = []
        self.l2_error_log = []

        # Saver
        self.saver = tf.train.Saver()
        
         # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)
        
    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    def initialize_NN(self, layers):
        weights = []
        biases = []
        
        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        
        W = self.xavier_init(size=[layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)
        
        return weights, biases
        
    # Evaluates the forward pass
    def forward_pass(self, H):
        num_layers = len(self.layers) 

        # Fourier feature encoding
        H = tf.concat([tf.sin(tf.matmul(H, self.W)),
                       tf.cos(tf.matmul(H, self.W))], 1) 

        # Pass through a MLP
        for l in range(0, num_layers-2):
            W = self.weights[l]
            b = self.biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H
    
     # Forward pass for u
    def net_u(self, t, x):
        u = self.forward_pass(tf.concat([t, x], 1))
        return u
    
     # Forward pass for residual
    def net_r(self, t, x):
        u = self.net_u(t, x)
        residual = self.operator(u, t, x, self.k,
                                 self.sigma_t, self.sigma_x)
        return residual

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def train(self, nIter=10000, batch_size=128):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size)
            X_bc1_batch, u_bc1_batch = self.fetch_minibatch(self.bcs_sampler[0], batch_size)
            X_bc2_batch, u_bc2_batch = self.fetch_minibatch(self.bcs_sampler[1], batch_size)

            # Fetch residual mini-batch
            X_res_batch, _ = self.fetch_minibatch(self.res_sampler, batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.t_ics_tf: X_ics_batch[:, 0:1], self.x_ics_tf: X_ics_batch[:, 1:2],
                       self.u_ics_tf: u_ics_batch,
                       self.t_bc1_tf: X_bc1_batch[:, 0:1], self.x_bc1_tf: X_bc1_batch[:, 1:2],
                       self.t_bc2_tf: X_bc2_batch[:, 0:1], self.x_bc2_tf: X_bc2_batch[:, 1:2],
                       self.t_r_tf: X_res_batch[:, 0:1], self.x_r_tf: X_res_batch[:, 1:2]}

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time
                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value, loss_ics_value, loss_res_value = self.sess.run([self.loss_bcs, 
                                                                                self.loss_ics, 
                                                                                self.loss_res], tf_dict)

                u_pred = self.predict_u(self.X_star)
                error = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)

                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_ics_log.append(loss_ics_value)
                self.loss_res_log.append(loss_res_value)
                self.l2_error_log.append(error)

                print('It: %d, Loss: %.3e, Loss_bcs: %.3e, Loss_ics: %.3e, Loss_res: %.3e, Time: %.2f' %
                      (it, loss_value, loss_bcs_value, loss_ics_value, loss_res_value, elapsed))
             
                start_time = timeit.default_timer()

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1], self.x_u_tf: X_star[:, 1:2]}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates residual at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_r_tf: X_star[:, 0:1], self.x_r_tf: X_star[:, 1:2]}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


class heat1D_ST_FF:
    def __init__(self, layers, operator, k, ics_sampler, bcs_sampler, res_sampler,  sigma, X_star, u_star):
        X, _ = res_sampler.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]
        
        # Samplers
        self.ics_sampler = ics_sampler
        self.bcs_sampler = bcs_sampler
        self.res_sampler = res_sampler
        
        # Define differential operator
        self.k = k
        self.operator = operator
        
        # Fourier hyperparameter
        self.sigma = sigma

        # Initialize spatial and temporal Fourier features
        self.W_t =tf.Variable(tf.random_normal([1, layers[0] //2], dtype=tf.float32)  * 1, dtype=tf.float32, trainable=False)
        self.W_x = tf.Variable(tf.random_normal([1, layers[0] //2], dtype=tf.float32)  * sigma, dtype=tf.float32, trainable=False)
        
        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        
        # Evaluate predictions
        self.u_ics_pred = self.net_u(self.t_ics_tf, self.x_ics_tf)
        self.u_bc1_pred = self.net_u(self.t_bc1_tf, self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.t_bc2_tf, self.x_bc2_tf)
        
        self.u_pred = self.net_u(self.t_u_tf, self.x_u_tf)
        self.r_pred = self.net_r(self.t_r_tf, self.x_r_tf)

        # Boundary loss and Initial loss
        self.loss_ic = tf.reduce_mean(tf.square(self.u_ics_tf - self.u_ics_pred))
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred))

        self.loss_bcs = self.loss_bc1 + self.loss_bc2
        self.loss_ics =  self.loss_ic
    
        # Residual loss
        self.loss_res = tf.reduce_mean(tf.square(self.r_pred))

        # Total loss
        self.loss = self.loss_res + self.loss_bcs + self.loss_ics
        
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Test data
        self.X_star = X_star
        self.u_star = u_star

         # Logger
        self.loss_bcs_log = []
        self.loss_ics_log = []
        self.loss_res_log = []
        self.l2_error_log = []
        
        # Saver
        self.saver = tf.train.Saver()
        
         # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)
        
    # Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    # Initialize the network
    def initialize_NN(self, layers):
        weights = []
        biases = []
        
        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        
#        W = self.xavier_init(size=[2 *layers[-2], layers[-1]])
        W = self.xavier_init(size=[layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)
        
        return weights, biases
        
    # Evaluates the forward pass
    def forward_pass(self, H):
        num_layers = len(self.layers) 
        t = H[:,0:1]
        x = H[:,1:2]

        # Temporal Fourier feature encoding
        H_t = tf.concat([tf.sin(tf.matmul(t, self.W_t)),
                         tf.cos(tf.matmul(t, self.W_t))], 1)   # H1  (N ,50))
        # Spatial Fourier feature encoding
        H_x = tf.concat([tf.sin(tf.matmul(x, self.W_x)),
                         tf.cos(tf.matmul(x, self.W_x))], 1) 

        # Pass through a MLP
        for l in range(0, num_layers-2):
            W = self.weights[l]
            b = self.biases[l]
            H_t = tf.tanh(tf.add(tf.matmul(H_t, W), b))
            H_x = tf.tanh(tf.add(tf.matmul(H_x, W), b))

        # Merge the outputs via point-wise multiplication
        H = tf.multiply(H_t, H_x)   

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H
    
     # Forward pass for u
    def net_u(self, t, x):
        u = self.forward_pass(tf.concat([t, x], 1))
        return u
    
     # Forward pass for residual
    def net_r(self, t, x):
        u = self.net_u(t, x)
        residual = self.operator(u, t, x, self.k,
                                 self.sigma_t, self.sigma_x)
        return residual

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    def train(self, nIter=10000, batch_size=128):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size)
            X_bc1_batch, u_bc1_batch = self.fetch_minibatch(self.bcs_sampler[0], batch_size)
            X_bc2_batch, u_bc2_batch = self.fetch_minibatch(self.bcs_sampler[1], batch_size)

            # Fetch residual mini-batch
            X_res_batch, _ = self.fetch_minibatch(self.res_sampler, batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.t_ics_tf: X_ics_batch[:, 0:1], self.x_ics_tf: X_ics_batch[:, 1:2],
                       self.u_ics_tf: u_ics_batch,
                       self.t_bc1_tf: X_bc1_batch[:, 0:1], self.x_bc1_tf: X_bc1_batch[:, 1:2],
                       self.t_bc2_tf: X_bc2_batch[:, 0:1], self.x_bc2_tf: X_bc2_batch[:, 1:2],
                       self.t_r_tf: X_res_batch[:, 0:1], self.x_r_tf: X_res_batch[:, 1:2]}

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time
                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value, loss_ics_value, loss_res_value = self.sess.run([self.loss_bcs, 
                                                                                self.loss_ics, 
                                                                                self.loss_res], tf_dict)
    
                u_pred = self.predict_u(self.X_star)
                error = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)

                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_ics_log.append(loss_ics_value)
                self.loss_res_log.append(loss_res_value)
                self.l2_error_log.append(error)
 
                print('It: %d, Loss: %.3e, Loss_bcs: %.3e, Loss_ics: %.3e, Loss_res: %.3e, Time: %.2f' %
                      (it, loss_value, loss_bcs_value, loss_ics_value, loss_res_value, elapsed))
             
                start_time = timeit.default_timer()

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1], self.x_u_tf: X_star[:, 1:2]}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates PDE residual at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_r_tf: X_star[:, 0:1], self.x_r_tf: X_star[:, 1:2]}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


================================================
FILE: wave1D/Compute_Jacobian.py
================================================
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 11 17:45:07 2020

@author: sifan
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import gradients_impl as gradient_ops
from tensorflow.python.ops.parallel_for import control_flow_ops
from tensorflow.python.util import nest

def jacobian(output, inputs, use_pfor=True, parallel_iterations=None):
  """Computes jacobian of `output` w.r.t. `inputs`.
  Args:
    output: A tensor.
    inputs: A tensor or a nested structure of tensor objects.
    use_pfor: If true, uses pfor for computing the jacobian. Else uses
      tf.while_loop.
    parallel_iterations: A knob to control how many iterations and dispatched in
      parallel. This knob can be used to control the total memory usage.
  Returns:
    A tensor or a nested structure of tensors with the same structure as
    `inputs`. Each entry is the jacobian of `output` w.r.t. to the corresponding
    value in `inputs`. If output has shape [y_1, ..., y_n] and inputs_i has
    shape [x_1, ..., x_m], the corresponding jacobian has shape
    [y_1, ..., y_n, x_1, ..., x_m]. Note that in cases where the gradient is
    sparse (IndexedSlices), jacobian function currently makes it dense and
    returns a Tensor instead. This may change in the future.
  """
  flat_inputs = nest.flatten(inputs)
  output_tensor_shape = output.shape
  output_shape = array_ops.shape(output)
  output = array_ops.reshape(output, [-1])

  def loop_fn(i):
    y = array_ops.gather(output, i)
    return gradient_ops.gradients(y, flat_inputs,  unconnected_gradients=tf.UnconnectedGradients.ZERO)

  try:
    output_size = int(output.shape[0])
  except TypeError:
    output_size = array_ops.shape(output)[0]

  if use_pfor:
    pfor_outputs = control_flow_ops.pfor(
        loop_fn, output_size, parallel_iterations=parallel_iterations)
  else:
    pfor_outputs = control_flow_ops.for_loop(
        loop_fn,
        [output.dtype] * len(flat_inputs),
        output_size,
        parallel_iterations=parallel_iterations)

  for i, out in enumerate(pfor_outputs):
    if isinstance(out, ops.Tensor):
      new_shape = array_ops.concat(
          [output_shape, array_ops.shape(out)[1:]], axis=0)
      out = array_ops.reshape(out, new_shape)
      out.set_shape(output_tensor_shape.concatenate(flat_inputs[i].shape))
      pfor_outputs[i] = out

  return nest.pack_sequence_as(inputs, pfor_outputs)

================================================
FILE: wave1D/wave1D.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
from wave_models_tf import Sampler, Wave1D_NTK, Wave1D_NTK_mFF, Wave1D_NTK_ST_mFF

if __name__ == '__main__':
    def u(x, a, c):
        """
        :param x: x = (t, x)
        """
        t = x[:,0:1]
        x = x[:,1:2]
        return np.sin(np.pi * x) * np.cos(c * np.pi * t) + \
                np.sin(a * np.pi* x) * np.cos(a * c  * np.pi * t)

    def f(x, a, c):
        N = x.shape[0]
        return  np.zeros((N,1))

    def operator(u, t, x, c, sigma_t=1.0, sigma_x=1.0):
        u_t = tf.gradients(u, t)[0] / sigma_t
        u_x = tf.gradients(u, x)[0] / sigma_x
        u_tt = tf.gradients(u_t, t)[0] / sigma_t
        u_xx = tf.gradients(u_x, x)[0] / sigma_x
        residual = u_tt - c**2 * u_xx
        return residual
    
    # Hyper-parameters
    a = 2
    c = 10
    
    # Domain boundaries
    ics_coords = np.array([[0.0, 0.0],
                           [0.0, 1.0]])
    bc1_coords = np.array([[0.0, 0.0],
                           [1.0, 0.0]])
    bc2_coords = np.array([[0.0, 1.0],
                           [1.0, 1.0]])
    dom_coords = np.array([[0.0, 0.0],
                           [1.0, 1.0]])

    # Create initial conditions samplers
    ics_sampler = Sampler(2, ics_coords, lambda x: u(x, a, c), name='Initial Condition 1')

    # Create boundary conditions samplers
    bc1 = Sampler(2, bc1_coords, lambda x: u(x, a, c), name='Dirichlet BC1')
    bc2 = Sampler(2, bc2_coords, lambda x: u(x, a, c), name='Dirichlet BC2')
    bcs_sampler = [bc1, bc2]

    # Create residual sampler
    res_sampler = Sampler(2, dom_coords, lambda x: f(x, a, c), name='Forcing')
    
    # Test data
    nn = 200
    t = np.linspace(dom_coords[0, 0], dom_coords[1, 0], nn)[:, None]
    x = np.linspace(dom_coords[0, 1], dom_coords[1, 1], nn)[:, None]
    t, x = np.meshgrid(t, x)
    X_star = np.hstack((t.flatten()[:, None], x.flatten()[:, None]))

    u_star = u(X_star, a,c)

    # Define model
    # Wave1D_NTK: Plain MLP with NTK adaptive weights
    # Wave1D_NTK_mFF: Multi-scale Fourier feature network with NTK adaptive weights
    # Wave1D_NTK_ST_mFF: Spatial-temporal Fourier feature network with NTK adaptive weights
    
#    layers = [2, 200, 200, 200, 1]    # if use Wave1D_NTK model
    layers = [200, 200, 200, 1]

    kernel_size = 120
    model = Wave1D_NTK_mFF(layers, operator, ics_sampler, bcs_sampler, res_sampler, c, kernel_size, X_star, u_star)
    
    
    # Train model
    itertaions = 40001
    model.train(nIter=itertaions, batch_size =kernel_size, log_NTK=True, update_weights=True)

    # Predictions
    u_pred = model.predict_u(X_star)
    f_pred = model.predict_r(X_star)

    error_u = np.linalg.norm(u_star - u_pred, 2) / np.linalg.norm(u_star, 2)

    print('Relative L2 error_u: %e' % (error_u))
    
    # Plot
    U_star = griddata(X_star, u_star.flatten(), (t, x), method='cubic')
    U_pred = griddata(X_star, u_pred.flatten(), (t, x), method='cubic')
    
    # Predictions    
    fig = plt.figure(3, figsize=(18, 5))
    plt.subplot(1, 3, 1)
    plt.pcolor(t, x, U_star, cmap='jet')
    plt.colorbar()
    plt.xlabel('$t$')
    plt.ylabel('$x$')
    plt.title('Exact u(x)')

    plt.subplot(1, 3, 2)
    plt.pcolor(t, x, U_pred, cmap='jet')
    plt.colorbar()
    plt.xlabel('$t$')
    plt.ylabel('$x$')
    plt.title('Predicted u(x)')

    plt.subplot(1, 3, 3)
    plt.pcolor(t, x, np.abs(U_star - U_pred), cmap='jet')
    plt.colorbar()
    plt.xlabel('$t$')
    plt.ylabel('$x$')
    plt.title('Absolute error')
    plt.tight_layout()
    plt.show()
    
    # Restore loss_res and loss_bcs
    loss_res = model.loss_res_log
    loss_bcs = model.loss_bcs_log
    loss_u_t_ics = model.loss_ut_ics_log

    l2_error = model.l2_error_log

    fig = plt.figure(figsize=(6,5))
    iters =100 *  np.arange(len(loss_res))
    with sns.axes_style("darkgrid"):
        plt.plot(iters, loss_res, label='$\mathcal{L}_{r}$')
        plt.plot(iters, loss_bcs, label='$\mathcal{L}_{u}$')
        plt.plot(iters, loss_u_t_ics, label='$\mathcal{L}_{u_t}$')
        plt.plot(iters, l2_error, label='$\mathcal{L}^2 error$')
        plt.yscale('log')
        plt.xlabel('iterations')
        plt.legend(ncol=2)
        plt.tight_layout()
        plt.show()

    # NTK
    # Create loggers for eigenvalues of NTK
    lambda_K_u_log = []
    lambda_K_ut_log = []
    lambda_K_r_log = []
    
    # Restore the NTK
    K_u_list = model.K_u_log
    K_ut_list = model.K_ut_log
    K_r_list = model.K_r_log
        
    for k in range(len(K_u_list)):
        K_u = K_u_list[k]
        K_ut = K_ut_list[k]
        K_r = K_r_list[k]
            
        # Compute eigenvalues
        lambda_K_u, _ = np.linalg.eig(K_u)
        lambda_K_ut, _ = np.linalg.eig(K_ut)
        lambda_K_r, _ = np.linalg.eig(K_r)
        # Sort in descresing order
        lambda_K_u = np.sort(np.real(lambda_K_u))[::-1]
        lambda_K_ut = np.sort(np.real(lambda_K_ut))[::-1]
        lambda_K_r = np.sort(np.real(lambda_K_r))[::-1]
        
        # Store eigenvalues
        lambda_K_u_log.append(lambda_K_u)
        lambda_K_ut_log.append(lambda_K_ut)
        lambda_K_r_log.append(lambda_K_r)
    
    #     Eigenvalues of NTK
    fig = plt.figure(figsize=(18, 5))
    plt.subplot(1,3,1)
    plt.plot(lambda_K_u_log[0], label = '$n=0$')
    plt.plot(lambda_K_u_log[1], '--', label = '$n=10,000$')
    plt.plot(lambda_K_u_log[4], '--', label = '$n=40,000$')
    plt.plot(lambda_K_u_log[-1], '--', label = '$n=80,000$')
    plt.xlabel('index')
    plt.xscale('log')
    plt.yscale('log')
    plt.legend()
    plt.title(r'Eigenvalues of ${K}_u$')

    plt.subplot(1,3,2)
    plt.plot(lambda_K_ut_log[0], label = '$n=0$')
    plt.plot(lambda_K_ut_log[1], '--',label = '$n=10,000$')
    plt.plot(lambda_K_ut_log[4], '--', label = '$n=40,000$')
    plt.plot(lambda_K_ut_log[-1], '--', label = '$n=80,000$')
    plt.xlabel('index')
    plt.xscale('log')
    plt.yscale('log')
    plt.legend()
    plt.title(r'Eigenvalues of ${K}_{u_t}$')
    
    ax =plt.subplot(1,3,3)
    plt.plot(lambda_K_r_log[0], label = '$n=0$')
    plt.plot(lambda_K_r_log[1], '--', label = '$n=10,000$')
    plt.plot(lambda_K_r_log[4], '--', label = '$n=40,000$')
    plt.plot(lambda_K_r_log[-1], '--', label = '$n=80,000$')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('index')
    plt.title(r'Eigenvalues of ${K}_{r}$')
    plt.legend()
    plt.tight_layout()
    plt.show()
     
    # Evolution of weights during training
    lambda_u_log = model.lambda_u_log
    lambda_ut_log = model.lambda_ut_log
    lambda_r_log = model.lambda_r_log   

    fig = plt.figure(figsize=(6, 5))
    plt.plot(lambda_u_log, label='$\lambda_u$')
    plt.plot(lambda_ut_log, label='$\lambda_{u_t}$')
    plt.plot(lambda_r_log, label='$\lambda_{r}$')
    plt.xlabel('iterations')
    plt.ylabel('$\lambda$')
    plt.yscale('log')
    plt.legend( )
    plt.locator_params(axis='x',nbins=5)
    plt.tight_layout()
    plt.show()   
    

================================================
FILE: wave1D/wave_models_tf.py
================================================
import tensorflow as tf
from Compute_Jacobian import jacobian
import numpy as np
import timeit

class Sampler:
    # Initialize the class
    def __init__(self, dim, coords, func, name = None):
        self.dim = dim
        self.coords = coords
        self.func = func
        self.name = name
    def sample(self, N):
        x = self.coords[0:1,:] + (self.coords[1:2,:]-self.coords[0:1,:])*np.random.rand(N, self.dim)
        y = self.func(x)
        return x, y


class Wave1D_NTK:
    # Plain MLP with NTK adaptive weights

    # Initialize the class
    def __init__(self, layers, operator, ics_sampler, bcs_sampler, res_sampler, c, kernel_size, X_star, u_star):

        # Normalize input
        X, _ = res_sampler.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]

        # Samplers
        self.operator = operator
        self.ics_sampler = ics_sampler
        self.bcs_sampler = bcs_sampler
        self.res_sampler = res_sampler

        # Test data
        self.X_star = X_star
        self.u_star = u_star

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)
        
        # Initialize weights for losses
        self.lambda_u_val = np.array(1.0)
        self.lambda_ut_val = np.array(1.0)
        self.lambda_r_val = np.array(1.0)
      
        # Wave velocity
        self.c = tf.constant(c, dtype=tf.float32)

        # Size of the NTK
        self.kernel_size = kernel_size

        D1 = self.kernel_size    # size of K_u
        D2 = self.kernel_size    # size of K_ut
        D3 = self.kernel_size    # size of K_r

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.lambda_u_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)
        self.lambda_ut_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)
        self.lambda_r_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)

        self.t_u_ntk_tf = tf.placeholder(tf.float32, shape=(D1, 1))
        self.x_u_ntk_tf = tf.placeholder(tf.float32, shape=(D1, 1))
        
        self.t_ut_ntk_tf = tf.placeholder(tf.float32, shape=(D2, 1))
        self.x_ut_ntk_tf = tf.placeholder(tf.float32, shape=(D2, 1))
        
        self.t_r_ntk_tf = tf.placeholder(tf.float32, shape=(D3, 1))
        self.x_r_ntk_tf = tf.placeholder(tf.float32, shape=(D3, 1))

        # Evaluate predictions
        self.u_ics_pred = self.net_u(self.t_ics_tf, self.x_ics_tf)
        self.u_t_ics_pred = self.net_u_t(self.t_ics_tf, self.x_ics_tf)
        self.u_bc1_pred = self.net_u(self.t_bc1_tf, self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.t_bc2_tf, self.x_bc2_tf)

        self.u_pred = self.net_u(self.t_u_tf, self.x_u_tf)
        self.r_pred = self.net_r(self.t_r_tf, self.x_r_tf)
        
        self.u_ntk_pred = self.net_u(self.t_u_ntk_tf, self.x_u_ntk_tf)
        self.ut_ntk_pred = self.net_u_t(self.t_ut_ntk_tf, self.x_ut_ntk_tf)
        self.r_ntk_pred = self.net_r(self.t_r_ntk_tf, self.x_r_ntk_tf)

        # Boundary loss and Initial loss
        self.loss_ics_u = tf.reduce_mean(tf.square(self.u_ics_tf - self.u_ics_pred))
        self.loss_ics_u_t = tf.reduce_mean(tf.square(self.u_t_ics_pred))
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred))

        self.loss_bcs = self.loss_ics_u + self.loss_bc1 + self.loss_bc2

        # Residual loss
        self.loss_res = tf.reduce_mean(tf.square(self.r_pred))

        # Total loss
        self.loss = self.lambda_r_tf * self.loss_res + self.lambda_u_tf * self.loss_bcs + self.lambda_ut_tf * self.loss_ics_u_t 

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Compute the Jacobian for weights and biases in each hidden layer  
        self.J_u = self.compute_jacobian(self.u_ntk_pred)
        self.J_ut = self.compute_jacobian(self.ut_ntk_pred)
        self.J_r = self.compute_jacobian(self.r_ntk_pred)
        
        self.K_u = self.compute_ntk(self.J_u, D1, self.J_u, D1)
        self.K_ut = self.compute_ntk(self.J_ut, D2, self.J_ut, D2)
        self.K_r = self.compute_ntk(self.J_r, D3, self.J_r, D3)

        # Loss logger
        self.loss_bcs_log = []
        self.loss_ut_ics_log = []
        self.loss_res_log = []
        self.l2_error_log = []

        # NTK logger
        self.K_u_log = []
        self.K_ut_log = []
        self.K_r_log = []
        
        # weights logger
        self.lambda_u_log = []
        self.lambda_ut_log = []
        self.lambda_r_log = []
        
         # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

        # Saver
        self.saver = tf.train.Saver()

    # Initialize network weights and biases using Xavier initialization
    def initialize_NN(self, layers):
        # Xavier initialization
        def xavier_init(size):
            in_dim = size[0]
            out_dim = size[1]
            xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
            return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                               dtype=tf.float32)

        weights = []
        biases = []
        num_layers = len(layers)
        for l in range(0, num_layers - 1):
            W = xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.zeros([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)
        return weights, biases

    # Evaluates the forward pass
    def forward_pass(self, H, layers, weights, biases):
        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = weights[l]
            b = biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
        W = weights[-1]
        b = biases[-1]
        H = tf.add(tf.matmul(H, W), b)
        return H

    # Forward pass for u
    def net_u(self, t, x):
        u = self.forward_pass(tf.concat([t, x], 1),
                              self.layers,
                              self.weights,
                              self.biases)
        return u

    def net_u_t(self, t, x):
        u_t = tf.gradients(self.net_u(t, x), t)[0] / self.sigma_t
        return u_t

    # Forward pass for f
    def net_r(self, t, x):
        u = self.net_u(t, x)
        residual = self.operator(u, t, x,
                                 self.c,
                                 self.sigma_t,
                                 self.sigma_x)
        return residual

    # Compute Jacobian for each weights and biases in each layer and retrun a list 
    def compute_jacobian(self, f):
        J_list =[]
        L = len(self.weights)    
        for i in range(L):
            J_w = jacobian(f, self.weights[i])
            J_list.append(J_w)
     
        for i in range(L):
            J_b = jacobian(f, self.biases[i])
            J_list.append(J_b)
        return J_list
    
    # Compute the empirical NTK = J J^T
    def compute_ntk(self, J1_list, D1, J2_list, D2):

        N = len(J1_list)
        
        Ker = tf.zeros((D1,D2))
        for k in range(N):
            J1 = tf.reshape(J1_list[k], shape=(D1,-1))
            J2 = tf.reshape(J2_list[k], shape=(D2,-1))
            
            K = tf.matmul(J1, tf.transpose(J2))
            Ker = Ker + K
        return Ker

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

        # Trains the model by minimizing the MSE loss

    def train(self, nIter=10000, batch_size=128, log_NTK=False, update_weights=False):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size // 3)
            X_bc1_batch, _ = self.fetch_minibatch(self.bcs_sampler[0], batch_size // 3)
            X_bc2_batch, _ = self.fetch_minibatch(self.bcs_sampler[1], batch_size // 3)
            
            # Fetch residual mini-batch
            X_res_batch, _ = self.fetch_minibatch(self.res_sampler, batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.t_ics_tf: X_ics_batch[:, 0:1], self.x_ics_tf: X_ics_batch[:, 1:2],
                       self.u_ics_tf: u_ics_batch,
                       self.t_bc1_tf: X_bc1_batch[:, 0:1], self.x_bc1_tf: X_bc1_batch[:, 1:2],
                       self.t_bc2_tf: X_bc2_batch[:, 0:1], self.x_bc2_tf: X_bc2_batch[:, 1:2],
                       self.t_r_tf: X_res_batch[:, 0:1], self.x_r_tf: X_res_batch[:, 1:2],
                       self.lambda_u_tf: self.lambda_u_val,
                       self.lambda_ut_tf: self.lambda_ut_val,
                       self.lambda_r_tf: self.lambda_r_val}

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time

                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value = self.sess.run(self.loss_bcs, tf_dict)
                loss_ics_ut_value = self.sess.run(self.loss_ics_u_t, tf_dict)
                loss_res_value = self.sess.run(self.loss_res, tf_dict)

                u_pred = self.predict_u(self.X_star)
                error = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)

                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_res_log.append(loss_res_value)
                self.loss_ut_ics_log.append(loss_ics_ut_value)
                self.l2_error_log.append(error)

                print('It: %d, Loss: %.3e, Loss_res: %.3e,  Loss_bcs: %.3e, Loss_ut_ics: %.3e,, Time: %.2f' %
                      (it, loss_value, loss_res_value, loss_bcs_value, loss_ics_ut_value, elapsed))

                print('lambda_u: {}'.format(self.lambda_u_val))
                print('lambda_ut: {}'.format(self.lambda_ut_val))
                print('lambda_r: {}'.format(self.lambda_r_val))

                start_time = timeit.default_timer()
            
            if log_NTK:
                X_bc_batch = np.vstack([X_ics_batch, X_bc1_batch, X_bc2_batch])
                X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size )
                
                if it % 1000 == 0:
                        print("Compute NTK...")
                        tf_dict = {self.t_u_ntk_tf: X_bc_batch[:,0 :1], self.x_u_ntk_tf: X_bc_batch[:, 1:2],
                                   self.t_ut_ntk_tf: X_ics_batch[:, 0:1], self.x_ut_ntk_tf: X_ics_batch[:, 1:2],
                                   self.t_r_ntk_tf: X_res_batch[:, 0:1], self.x_r_ntk_tf: X_res_batch[:, 1:2]}

                        # Compute NTK
                        K_u_value, K_ut_value, K_r_value = self.sess.run([self.K_u, self.K_ut, self.K_r], tf_dict)
                        
                        lambda_K_sum = np.trace(K_u_value) + np.trace(K_ut_value) + \
                                       np.trace(K_r_value)

                        # Store NTK and weights
                        self.K_u_log.append(K_u_value)
                        self.K_ut_log.append(K_ut_value)
                        self.K_r_log.append(K_r_value)

                        if update_weights:
                            self.lambda_u_val = lambda_K_sum / np.trace(K_u_value)
                            self.lambda_ut_val = lambda_K_sum /np.trace(K_ut_value)
                            self.lambda_r_val = lambda_K_sum / np.trace(K_r_value)

                        # Store weights
                        self.lambda_u_log.append(self.lambda_u_val)
                        self.lambda_ut_log.append(self.lambda_ut_val)
                        self.lambda_r_log.append(self.lambda_r_val)
          
    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1], self.x_u_tf: X_star[:, 1:2]}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates predictions at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_r_tf: X_star[:, 0:1], self.x_r_tf: X_star[:, 1:2]}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


class Wave1D_NTK_mFF:
    # Multiscale Fourier network with NTK adaptive weights

    # Initialize the class
    def __init__(self, layers, operator, ics_sampler, bcs_sampler, res_sampler, c, kernel_size, X_star, u_star):
        # Normalize input
        X, _ = res_sampler.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]

        # Samplers
        self.operator = operator
        self.ics_sampler = ics_sampler
        self.bcs_sampler = bcs_sampler
        self.res_sampler = res_sampler

        # Test data
        self.X_star = X_star
        self.u_star = u_star

        # Initialize multi-scale Fourier features
        self.W1 = tf.Variable(tf.random_normal([2, layers[0] // 2], dtype=tf.float32) * 1.0,
                               dtype=tf.float32, trainable=True)

        self.W2 = tf.Variable(tf.random_normal([2, layers[0] // 2], dtype=tf.float32) * 10.0,
                               dtype=tf.float32, trainable=True)

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # Initialize weights for losses
        self.lambda_u_val = np.array(1.0)
        self.lambda_ut_val = np.array(1.0)
        self.lambda_r_val = np.array(1.0)

        # Wave velocity constant
        self.c = tf.constant(c, dtype=tf.float32)

        # Size of the NTK
        self.kernel_size = kernel_size

        D1 = self.kernel_size    # size of K_u
        D2 = self.kernel_size    # size of K_ut
        D3 = self.kernel_size    # size of K_r

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.lambda_u_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)
        self.lambda_ut_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)
        self.lambda_r_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)

        self.t_u_ntk_tf = tf.placeholder(tf.float32, shape=(D1, 1))
        self.x_u_ntk_tf = tf.placeholder(tf.float32, shape=(D1, 1))

        self.t_ut_ntk_tf = tf.placeholder(tf.float32, shape=(D2, 1))
        self.x_ut_ntk_tf = tf.placeholder(tf.float32, shape=(D2, 1))

        self.t_r_ntk_tf = tf.placeholder(tf.float32, shape=(D3, 1))
        self.x_r_ntk_tf = tf.placeholder(tf.float32, shape=(D3, 1))

        # Evaluate predictions
        self.u_ics_pred = self.net_u(self.t_ics_tf, self.x_ics_tf)
        self.u_t_ics_pred = self.net_u_t(self.t_ics_tf, self.x_ics_tf)
        self.u_bc1_pred = self.net_u(self.t_bc1_tf, self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.t_bc2_tf, self.x_bc2_tf)

        self.u_pred = self.net_u(self.t_u_tf, self.x_u_tf)
        self.r_pred = self.net_r(self.t_r_tf, self.x_r_tf)

        self.u_ntk_pred = self.net_u(self.t_u_ntk_tf, self.x_u_ntk_tf)
        self.ut_ntk_pred = self.net_u_t(self.t_ut_ntk_tf, self.x_ut_ntk_tf)
        self.r_ntk_pred = self.net_r(self.t_r_ntk_tf, self.x_r_ntk_tf)

        # Boundary loss and Initial loss
        self.loss_ics_u = tf.reduce_mean(tf.square(self.u_ics_tf - self.u_ics_pred))
        self.loss_ics_u_t = tf.reduce_mean(tf.square(self.u_t_ics_pred))
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred))

        self.loss_bcs = self.loss_ics_u + self.loss_bc1 + self.loss_bc2

        # Residual loss
        self.loss_res = tf.reduce_mean(tf.square(self.r_pred))

        # Total loss
        self.loss = self.lambda_r_tf * self.loss_res + self.lambda_u_tf * self.loss_bcs + self.lambda_ut_tf * self.loss_ics_u_t

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Compute the Jacobian for weights and biases in each hidden layer
        self.J_u = self.compute_jacobian(self.u_ntk_pred)
        self.J_ut = self.compute_jacobian(self.ut_ntk_pred)
        self.J_r = self.compute_jacobian(self.r_ntk_pred)

        self.K_u = self.compute_ntk(self.J_u, D1, self.J_u, D1)
        self.K_ut = self.compute_ntk(self.J_ut, D2, self.J_ut, D2)
        self.K_r = self.compute_ntk(self.J_r, D3, self.J_r, D3)

        # Loss logger
        self.loss_bcs_log = []
        self.loss_ut_ics_log = []
        self.loss_res_log = []
        self.l2_error_log = []

        # NTK logger
        self.K_u_log = []
        self.K_ut_log = []
        self.K_r_log = []

        # weights logger
        self.lambda_u_log = []
        self.lambda_ut_log = []
        self.lambda_r_log = []

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

        # Saver
        self.saver = tf.train.Saver()

    # Initialize network weights and biases using Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    def initialize_NN(self, layers):
        weights = []
        biases = []

        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)

        W = self.xavier_init(size=[2 * layers[-2], layers[-1]])
        # W = self.xavier_init(size=[layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)

        return weights, biases

    # Evaluates the forward pass
    def forward_pass(self, H):
        num_layers = len(self.layers)

        # Multi-scale Fourier feature encodings
        H1 = tf.concat([tf.sin(tf.matmul(H, self.W1)),
                        tf.cos(tf.matmul(H, self.W1))], 1)
        H2 = tf.concat([tf.sin(tf.matmul(H, self.W2)),
                        tf.cos(tf.matmul(H, self.W2))], 1)

        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]

            H1 = tf.tanh(tf.add(tf.matmul(H1, W), b))
            H2 = tf.tanh(tf.add(tf.matmul(H2, W), b))

        # Merge the outputs by concatenation
        H = tf.concat([H1, H2], 1)

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)

        return H

    # Forward pass for u
    def net_u(self, t, x):
        u = self.forward_pass(tf.concat([t, x], 1))
        return u

    def net_u_t(self, t, x):
        u_t = tf.gradients(self.net_u(t, x), t)[0] / self.sigma_t
        return u_t

    # Forward pass for f
    def net_r(self, t, x):
        u = self.net_u(t, x)
        residual = self.operator(u, t, x,
                                 self.c,
                                 self.sigma_t,
                                 self.sigma_x)
        return residual

    # Compute Jacobian for each weights and biases in each layer and retrun a list
    def compute_jacobian(self, f):
        J_list = []
        L = len(self.weights)
        for i in range(L):
            J_w = jacobian(f, self.weights[i])
            J_list.append(J_w)

        for i in range(L):
            J_b = jacobian(f, self.biases[i])
            J_list.append(J_b)
        return J_list

    # Compute the empirical NTK = J J^T
    def compute_ntk(self, J1_list, D1, J2_list, D2):

        N = len(J1_list)

        Ker = tf.zeros((D1, D2))
        for k in range(N):
            J1 = tf.reshape(J1_list[k], shape=(D1, -1))
            J2 = tf.reshape(J2_list[k], shape=(D2, -1))

            K = tf.matmul(J1, tf.transpose(J2))
            Ker = Ker + K
        return Ker

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, batch_size=128, log_NTK=False, update_weights=False):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size // 3)
            X_bc1_batch, _ = self.fetch_minibatch(self.bcs_sampler[0], batch_size // 3)
            X_bc2_batch, _ = self.fetch_minibatch(self.bcs_sampler[1], batch_size // 3)

            # Fetch residual mini-batch
            X_res_batch, _ = self.fetch_minibatch(self.res_sampler, batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.t_ics_tf: X_ics_batch[:, 0:1], self.x_ics_tf: X_ics_batch[:, 1:2],
                       self.u_ics_tf: u_ics_batch,
                       self.t_bc1_tf: X_bc1_batch[:, 0:1], self.x_bc1_tf: X_bc1_batch[:, 1:2],
                       self.t_bc2_tf: X_bc2_batch[:, 0:1], self.x_bc2_tf: X_bc2_batch[:, 1:2],
                       self.t_r_tf: X_res_batch[:, 0:1], self.x_r_tf: X_res_batch[:, 1:2],
                       self.lambda_u_tf: self.lambda_u_val,
                       self.lambda_ut_tf: self.lambda_ut_val,
                       self.lambda_r_tf: self.lambda_r_val}

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time

                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value = self.sess.run(self.loss_bcs, tf_dict)
                loss_ics_ut_value = self.sess.run(self.loss_ics_u_t, tf_dict)
                loss_res_value = self.sess.run(self.loss_res, tf_dict)

                u_pred = self.predict_u(self.X_star)
                error = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)

                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_res_log.append(loss_res_value)
                self.loss_ut_ics_log.append(loss_ics_ut_value)
                self.l2_error_log.append(error)

                print('It: %d, Loss: %.3e, Loss_res: %.3e,  Loss_bcs: %.3e, Loss_ut_ics: %.3e,, Time: %.2f' %
                      (it, loss_value, loss_res_value, loss_bcs_value, loss_ics_ut_value, elapsed))

                print('lambda_u: {}'.format(self.lambda_u_val))
                print('lambda_ut: {}'.format(self.lambda_ut_val))
                print('lambda_r: {}'.format(self.lambda_r_val))

                start_time = timeit.default_timer()

            if log_NTK:
                X_bc_batch = np.vstack([X_ics_batch, X_bc1_batch, X_bc2_batch])
                X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size)

                if it % 100 == 0:
                    print("Compute NTK...")
                    tf_dict = {self.t_u_ntk_tf: X_bc_batch[:, 0:1], self.x_u_ntk_tf: X_bc_batch[:, 1:2],
                               self.t_ut_ntk_tf: X_ics_batch[:, 0:1], self.x_ut_ntk_tf: X_ics_batch[:, 1:2],
                               self.t_r_ntk_tf: X_res_batch[:, 0:1], self.x_r_ntk_tf: X_res_batch[:, 1:2]}

                    K_u_value, K_ut_value, K_r_value = self.sess.run([self.K_u, self.K_ut, self.K_r], tf_dict)

                    # Store NTK
                    self.K_u_log.append(K_u_value)
                    self.K_ut_log.append(K_ut_value)
                    self.K_r_log.append(K_r_value)

                    if update_weights:
                        lambda_K_sum = np.trace(K_u_value) + np.trace(K_ut_value) + \
                                       np.trace(K_r_value)

                        # Update weights
                        self.lambda_u_val = lambda_K_sum / np.trace(K_u_value)
                        self.lambda_ut_val = lambda_K_sum / np.trace(K_ut_value)
                        self.lambda_r_val = lambda_K_sum / np.trace(K_r_value)

                    # Store weights
                    self.lambda_u_log.append(self.lambda_u_val)
                    self.lambda_ut_log.append(self.lambda_ut_val)
                    self.lambda_r_log.append(self.lambda_r_val)

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1], self.x_u_tf: X_star[:, 1:2]}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates predictions at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_r_tf: X_star[:, 0:1], self.x_r_tf: X_star[:, 1:2]}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star


class Wave1D_NTK_ST_mFF:
    # Initialize the class
    def __init__(self, layers, operator, ics_sampler, bcs_sampler, res_sampler, c, kernel_size, X_star, u_star):
        # Normalization constants
        X, _ = res_sampler.sample(np.int32(1e5))
        self.mu_X, self.sigma_X = X.mean(0), X.std(0)
        self.mu_t, self.sigma_t = self.mu_X[0], self.sigma_X[0]
        self.mu_x, self.sigma_x = self.mu_X[1], self.sigma_X[1]

        # Samplers
        self.operator = operator
        self.ics_sampler = ics_sampler
        self.bcs_sampler = bcs_sampler
        self.res_sampler = res_sampler

        # Test data
        self.X_star = X_star
        self.u_star = u_star

        # Initialize spatial and temporal Fourier features
        self.W1_t = tf.Variable(tf.random_normal([1, layers[0] // 2], dtype=tf.float32) * 1.0,
                               dtype=tf.float32, trainable=False)

        self.W2_t = tf.Variable(tf.random_normal([1, layers[0] // 2], dtype=tf.float32) * 10.0,
                               dtype=tf.float32, trainable=False)

        self.W1_x = tf.Variable(tf.random_normal([1, layers[0] // 2], dtype=tf.float32) * 1.0,
                               dtype=tf.float32, trainable=False)

        # Initialize network weights and biases
        self.layers = layers
        self.weights, self.biases = self.initialize_NN(layers)

        # weights
        self.lambda_u_val = np.array(1.0)
        self.lambda_ut_val = np.array(1.0)
        self.lambda_r_val = np.array(1.0)

        # Wave velocity constant
        self.c = tf.constant(c, dtype=tf.float32)

        # Size of NTK
        self.kernel_size = kernel_size

        D1 = self.kernel_size    # size of K_u
        D2 = self.kernel_size    # size of K_ut
        D3 = self.kernel_size    # size of K_r

        # Define Tensorflow session
        self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        # Define placeholders and computational graph
        self.t_u_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_u_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.u_ics_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc1_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_bc2_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.t_r_tf = tf.placeholder(tf.float32, shape=(None, 1))
        self.x_r_tf = tf.placeholder(tf.float32, shape=(None, 1))

        self.lambda_u_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)
        self.lambda_ut_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)
        self.lambda_r_tf = tf.placeholder(tf.float32, shape=self.lambda_u_val.shape)

        self.t_u_ntk_tf = tf.placeholder(tf.float32, shape=(D1, 1))
        self.x_u_ntk_tf = tf.placeholder(tf.float32, shape=(D1, 1))

        self.t_ut_ntk_tf = tf.placeholder(tf.float32, shape=(D2, 1))
        self.x_ut_ntk_tf = tf.placeholder(tf.float32, shape=(D2, 1))

        self.t_r_ntk_tf = tf.placeholder(tf.float32, shape=(D3, 1))
        self.x_r_ntk_tf = tf.placeholder(tf.float32, shape=(D3, 1))

        # Evaluate predictions
        self.u_ics_pred = self.net_u(self.t_ics_tf, self.x_ics_tf)
        self.u_t_ics_pred = self.net_u_t(self.t_ics_tf, self.x_ics_tf)
        self.u_bc1_pred = self.net_u(self.t_bc1_tf, self.x_bc1_tf)
        self.u_bc2_pred = self.net_u(self.t_bc2_tf, self.x_bc2_tf)

        self.u_pred = self.net_u(self.t_u_tf, self.x_u_tf)
        self.r_pred = self.net_r(self.t_r_tf, self.x_r_tf)

        self.u_ntk_pred = self.net_u(self.t_u_ntk_tf, self.x_u_ntk_tf)
        self.ut_ntk_pred = self.net_u_t(self.t_ut_ntk_tf, self.x_ut_ntk_tf)
        self.r_ntk_pred = self.net_r(self.t_r_ntk_tf, self.x_r_ntk_tf)

        # Boundary loss and Initial loss
        self.loss_ics_u = tf.reduce_mean(tf.square(self.u_ics_tf - self.u_ics_pred))
        self.loss_ics_u_t = tf.reduce_mean(tf.square(self.u_t_ics_pred))
        self.loss_bc1 = tf.reduce_mean(tf.square(self.u_bc1_pred))
        self.loss_bc2 = tf.reduce_mean(tf.square(self.u_bc2_pred))

        self.loss_bcs = self.loss_ics_u + self.loss_bc1 + self.loss_bc2

        # Residual loss
        self.loss_res = tf.reduce_mean(tf.square(self.r_pred))

        # Total loss
        self.loss = self.lambda_r_tf * self.loss_res + self.lambda_u_tf * self.loss_bcs + self.lambda_ut_tf * self.loss_ics_u_t

        # Define optimizer with learning rate schedule
        self.global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 1e-3
        self.learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step,
                                                        1000, 0.9, staircase=False)
        # Passing global_step to minimize() will increment it at each step.
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

        # Compute the Jacobian for weights and biases in each hidden layer
        self.J_u = self.compute_jacobian(self.u_ntk_pred)
        self.J_ut = self.compute_jacobian(self.ut_ntk_pred)
        self.J_r = self.compute_jacobian(self.r_ntk_pred)

        self.K_u = self.compute_ntk(self.J_u, D1, self.J_u, D1)
        self.K_ut = self.compute_ntk(self.J_ut, D2, self.J_ut, D2)
        self.K_r = self.compute_ntk(self.J_r, D3, self.J_r, D3)

        # Loss logger
        self.loss_bcs_log = []
        self.loss_ut_ics_log = []
        self.loss_res_log = []
        self.l2_error_log = []

        # NTK logger
        self.K_u_log = []
        self.K_ut_log = []
        self.K_r_log = []

        # weights logger
        self.lambda_u_log = []
        self.lambda_ut_log = []
        self.lambda_r_log = []

        # Initialize Tensorflow variables
        init = tf.global_variables_initializer()
        self.sess.run(init)

        # Saver
        self.saver = tf.train.Saver()

    # Initialize network weights and biases using Xavier initialization
    def xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = 1. / np.sqrt((in_dim + out_dim) / 2.)
        return tf.Variable(tf.random_normal([in_dim, out_dim], dtype=tf.float32) * xavier_stddev,
                           dtype=tf.float32)

    def initialize_NN(self, layers):
        weights = []
        biases = []

        num_layers = len(layers)
        for l in range(0, num_layers - 2):
            W = self.xavier_init(size=[layers[l], layers[l + 1]])
            b = tf.Variable(tf.random_normal([1, layers[l + 1]], dtype=tf.float32), dtype=tf.float32)
            weights.append(W)
            biases.append(b)

        W = self.xavier_init(size=[2 * layers[-2], layers[-1]])
        b = tf.Variable(tf.random_normal([1, layers[-1]], dtype=tf.float32), dtype=tf.float32)
        weights.append(W)
        biases.append(b)

        return weights, biases

    # Evaluates the forward pass
    def forward_pass(self, H):
        num_layers = len(self.layers)

        t = H[:, 0:1]
        x = H[:, 1:2]

        # Temporal and spatial Fourier feature encodings
        H1_t = tf.concat([tf.sin(tf.matmul(t, self.W1_t)),
                         tf.cos(tf.matmul(t, self.W1_t))], 1)

        H2_t = tf.concat([tf.sin(tf.matmul(t, self.W2_t)),
                          tf.cos(tf.matmul(t, self.W2_t))], 1)

        H1_x = tf.concat([tf.sin(tf.matmul(x, self.W1_x)),
                          tf.cos(tf.matmul(x, self.W1_x))], 1)

        for l in range(0, num_layers - 2):
            W = self.weights[l]
            b = self.biases[l]

            H1_t = tf.tanh(tf.add(tf.matmul(H1_t, W), b))
            H2_t = tf.tanh(tf.add(tf.matmul(H2_t, W), b))
            H1_x = tf.tanh(tf.add(tf.matmul(H1_x, W), b))

        # Merge outputs
        H1 = tf.multiply(H1_t, H1_x)
        H2 = tf.multiply(H2_t, H1_x)
        H = tf.concat([H1, H2], 1)

        W = self.weights[-1]
        b = self.biases[-1]
        H = tf.add(tf.matmul(H, W), b)

        return H

    # Forward pass for u
    def net_u(self, t, x):
        u = self.forward_pass(tf.concat([t, x], 1))
        return u

    def net_u_t(self, t, x):
        u_t = tf.gradients(self.net_u(t, x), t)[0] / self.sigma_t
        return u_t

    # Forward pass for f
    def net_r(self, t, x):
        u = self.net_u(t, x)
        residual = self.operator(u, t, x,
                                 self.c,
                                 self.sigma_t,
                                 self.sigma_x)
        return residual

    # Compute Jacobian for each weights and biases in each layer and retrun a list
    def compute_jacobian(self, f):
        J_list = []
        L = len(self.weights)
        for i in range(L):
            J_w = jacobian(f, self.weights[i])
            J_list.append(J_w)

        for i in range(L):
            J_b = jacobian(f, self.biases[i])
            J_list.append(J_b)
        return J_list

    # Compute the empirical NTK = J J^T
    def compute_ntk(self, J1_list, D1, J2_list, D2):

        N = len(J1_list)

        Ker = tf.zeros((D1, D2))
        for k in range(N):
            J1 = tf.reshape(J1_list[k], shape=(D1, -1))
            J2 = tf.reshape(J2_list[k], shape=(D2, -1))

            K = tf.matmul(J1, tf.transpose(J2))
            Ker = Ker + K
        return Ker

    def fetch_minibatch(self, sampler, N):
        X, Y = sampler.sample(N)
        X = (X - self.mu_X) / self.sigma_X
        return X, Y

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, batch_size=128, log_NTK=False, update_weights=False):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch boundary mini-batches
            X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size // 3)
            X_bc1_batch, _ = self.fetch_minibatch(self.bcs_sampler[0], batch_size // 3)
            X_bc2_batch, _ = self.fetch_minibatch(self.bcs_sampler[1], batch_size // 3)

            # Fetch residual mini-batch
            X_res_batch, _ = self.fetch_minibatch(self.res_sampler, batch_size)

            # Define a dictionary for associating placeholders with data
            tf_dict = {self.t_ics_tf: X_ics_batch[:, 0:1], self.x_ics_tf: X_ics_batch[:, 1:2],
                       self.u_ics_tf: u_ics_batch,
                       self.t_bc1_tf: X_bc1_batch[:, 0:1], self.x_bc1_tf: X_bc1_batch[:, 1:2],
                       self.t_bc2_tf: X_bc2_batch[:, 0:1], self.x_bc2_tf: X_bc2_batch[:, 1:2],
                       self.t_r_tf: X_res_batch[:, 0:1], self.x_r_tf: X_res_batch[:, 1:2],
                       self.lambda_u_tf: self.lambda_u_val,
                       self.lambda_ut_tf: self.lambda_ut_val,
                       self.lambda_r_tf: self.lambda_r_val}

            # Run the Tensorflow session to minimize the loss
            self.sess.run(self.train_op, tf_dict)

            # Print
            if it % 100 == 0:
                elapsed = timeit.default_timer() - start_time

                loss_value = self.sess.run(self.loss, tf_dict)
                loss_bcs_value = self.sess.run(self.loss_bcs, tf_dict)
                loss_ics_ut_value = self.sess.run(self.loss_ics_u_t, tf_dict)
                loss_res_value = self.sess.run(self.loss_res, tf_dict)
                
                u_pred = self.predict_u(self.X_star)
                error = np.linalg.norm(self.u_star - u_pred, 2) / np.linalg.norm(self.u_star, 2)

                self.loss_bcs_log.append(loss_bcs_value)
                self.loss_res_log.append(loss_res_value)
                self.loss_ut_ics_log.append(loss_ics_ut_value)
                self.l2_error_log.append(error)

                print('It: %d, Loss: %.3e, Loss_res: %.3e,  Loss_bcs: %.3e, Loss_ut_ics: %.3e,, Time: %.2f' %
                      (it, loss_value, loss_res_value, loss_bcs_value, loss_ics_ut_value, elapsed))

                print('lambda_u: {}'.format(self.lambda_u_val))
                print('lambda_ut: {}'.format(self.lambda_ut_val))
                print('lambda_r: {}'.format(self.lambda_r_val))

                start_time = timeit.default_timer()

            if log_NTK:
                X_bc_batch = np.vstack([X_ics_batch, X_bc1_batch, X_bc2_batch])
                X_ics_batch, u_ics_batch = self.fetch_minibatch(self.ics_sampler, batch_size)

                if it % 100 == 0:
                    print("Compute NTK...")
                    tf_dict = {self.t_u_ntk_tf: X_bc_batch[:, 0:1], self.x_u_ntk_tf: X_bc_batch[:, 1:2],
                               self.t_ut_ntk_tf: X_ics_batch[:, 0:1], self.x_ut_ntk_tf: X_ics_batch[:, 1:2],
                               self.t_r_ntk_tf: X_res_batch[:, 0:1], self.x_r_ntk_tf: X_res_batch[:, 1:2]}

                    K_u_value, K_ut_value, K_r_value = self.sess.run([self.K_u, self.K_ut, self.K_r], tf_dict)


                    self.K_u_log.append(K_u_value)
                    self.K_ut_log.append(K_ut_value)
                    self.K_r_log.append(K_r_value)

                    if update_weights:
                        lambda_K_sum = np.trace(K_u_value) + np.trace(K_ut_value) + \
                                       np.trace(K_r_value)

                        self.lambda_u_val = lambda_K_sum / np.trace(K_u_value)
                        self.lambda_ut_val = lambda_K_sum / np.trace(K_ut_value)
                        self.lambda_r_val = lambda_K_sum / np.trace(K_r_value)

                    # Store weights
                    self.lambda_u_log.append(self.lambda_u_val)
                    self.lambda_ut_log.append(self.lambda_ut_val)
                    self.lambda_r_log.append(self.lambda_r_val)

    # Evaluates predictions at test points
    def predict_u(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_u_tf: X_star[:, 0:1], self.x_u_tf: X_star[:, 1:2]}
        u_star = self.sess.run(self.u_pred, tf_dict)
        return u_star

    # Evaluates predictions at test points
    def predict_r(self, X_star):
        X_star = (X_star - self.mu_X) / self.sigma_X
        tf_dict = {self.t_r_tf: X_star[:, 0:1], self.x_r_tf: X_star[:, 1:2]}
        r_star = self.sess.run(self.r_pred, tf_dict)
        return r_star