Repository: zhhlee/InterFusion Branch: main Commit: 8c85d511c1f0 Files: 116 Total size: 170.1 KB Directory structure: gitextract_ivtoovxj/ ├── .gitignore ├── LICENSE ├── README.md ├── algorithm/ │ ├── InterFusion.py │ ├── InterFusion_swat.py │ ├── __init__.py │ ├── cal_IPS.py │ ├── conv1d_.py │ ├── mcmc_recons.py │ ├── real_nvp.py │ ├── recurrent_distribution.py │ ├── stack_predict.py │ ├── stack_train.py │ └── utils.py ├── data/ │ ├── interpretation_label/ │ │ ├── anomaly_type.txt │ │ ├── machine-1-1.txt │ │ ├── machine-1-6.txt │ │ ├── machine-1-7.txt │ │ ├── machine-2-1.txt │ │ ├── machine-2-2.txt │ │ ├── machine-2-7.txt │ │ ├── machine-2-8.txt │ │ ├── machine-3-11.txt │ │ ├── machine-3-3.txt │ │ ├── machine-3-4.txt │ │ ├── machine-3-6.txt │ │ ├── machine-3-8.txt │ │ ├── omi-1.txt │ │ ├── omi-10.txt │ │ ├── omi-11.txt │ │ ├── omi-12.txt │ │ ├── omi-2.txt │ │ ├── omi-3.txt │ │ ├── omi-4.txt │ │ ├── omi-5.txt │ │ ├── omi-6.txt │ │ ├── omi-7.txt │ │ ├── omi-8.txt │ │ └── omi-9.txt │ └── processed/ │ ├── machine-1-1_test.pkl │ ├── machine-1-1_test_label.pkl │ ├── machine-1-1_train.pkl │ ├── machine-1-6_test.pkl │ ├── machine-1-6_test_label.pkl │ ├── machine-1-6_train.pkl │ ├── machine-1-7_test.pkl │ ├── machine-1-7_test_label.pkl │ ├── machine-1-7_train.pkl │ ├── machine-2-1_test.pkl │ ├── machine-2-1_test_label.pkl │ ├── machine-2-1_train.pkl │ ├── machine-2-2_test.pkl │ ├── machine-2-2_test_label.pkl │ ├── machine-2-2_train.pkl │ ├── machine-2-7_test.pkl │ ├── machine-2-7_test_label.pkl │ ├── machine-2-7_train.pkl │ ├── machine-2-8_test.pkl │ ├── machine-2-8_test_label.pkl │ ├── machine-2-8_train.pkl │ ├── machine-3-11_test.pkl │ ├── machine-3-11_test_label.pkl │ ├── machine-3-11_train.pkl │ ├── machine-3-3_test.pkl │ ├── machine-3-3_test_label.pkl │ ├── machine-3-3_train.pkl │ ├── machine-3-4_test.pkl │ ├── machine-3-4_test_label.pkl │ ├── machine-3-4_train.pkl │ ├── machine-3-6_test.pkl │ ├── machine-3-6_test_label.pkl │ ├── machine-3-6_train.pkl │ ├── machine-3-8_test.pkl │ ├── machine-3-8_test_label.pkl │ ├── machine-3-8_train.pkl │ ├── omi-10_test.pkl │ ├── omi-10_test_label.pkl │ ├── omi-10_train.pkl │ ├── omi-11_test.pkl │ ├── omi-11_test_label.pkl │ ├── omi-11_train.pkl │ ├── omi-12_test.pkl │ ├── omi-12_test_label.pkl │ ├── omi-12_train.pkl │ ├── omi-1_test.pkl │ ├── omi-1_test_label.pkl │ ├── omi-1_train.pkl │ ├── omi-2_test.pkl │ ├── omi-2_test_label.pkl │ ├── omi-2_train.pkl │ ├── omi-3_test.pkl │ ├── omi-3_test_label.pkl │ ├── omi-3_train.pkl │ ├── omi-4_test.pkl │ ├── omi-4_test_label.pkl │ ├── omi-4_train.pkl │ ├── omi-5_test.pkl │ ├── omi-5_test_label.pkl │ ├── omi-5_train.pkl │ ├── omi-6_test.pkl │ ├── omi-6_test_label.pkl │ ├── omi-6_train.pkl │ ├── omi-7_test.pkl │ ├── omi-7_test_label.pkl │ ├── omi-7_train.pkl │ ├── omi-8_test.pkl │ ├── omi-8_test_label.pkl │ ├── omi-8_train.pkl │ ├── omi-9_test.pkl │ ├── omi-9_test_label.pkl │ └── omi-9_train.pkl ├── explib/ │ ├── __init__.py │ ├── eval_methods.py │ ├── raw_data_converter.py │ └── utils.py └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .idea/ */__pycache__/ __pycache__/ .cache .pytest_cache *.pyc .DS_Store *.log data/Dataset Description.md .idea/vcs.xml ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2021 zhhlee Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # InterFusion **KDD 2021: Multivariate Time Series Anomaly Detection and Interpretation using Hierarchical Inter-Metric and Temporal Embedding** InterFusion is an unsupervised MTS anomaly detection and interpretation method. It's core idea is to model the normal patterns of MTS using HVAE with jointly trained hierarchical stochastic latent variables, each of which explicitly learns low-dimensional inter-metric or temporal embeddings. You may refer to our [paper](https://dl.acm.org/doi/abs/10.1145/3447548.3467075) for more details. ## Getting Started **Clone the repo** ```bash git clone https://github.com/zhhlee/InterFusion.git && cd InterFusion ``` **Get data** The datasets used in this paper are in folder ``data``. You may refer to ``data/Dataset Description`` for more details. **Install dependencies (with python 3.6+)** (virtualenv is recommended) ```bash pip install -r requirements.txt ``` The code is tested under the following basic environments: ``` OS: Ubuntu 18.04 GPU: GTX 1080 Ti Cuda: 9.0.176 Python: 3.6.6 ``` **Run the code** Please set the root directory of the project as your Python path. For dataset ASD and SMD: ```bash python algorithm/stack_train.py --dataset=omi-1 # training python algorithm/stack_predict.py --load_model_dir=./results/stack_train/ # evaluation ``` For dataset SWaT and WADI (Note: you need to acquire these datasets first following ``data/Dataset Description`` and ``explib/raw_data_converter``): SWaT: ```bash python algorithm/stack_train.py --dataset=SWaT --train.train_start=21600 --train.valid_portion=0.1 --model.window_length=30 '--model.output_shape=[15, 15, 30]' --model.z2_dim=8 # training python algorithm/stack_predict.py --load_model_dir=./results/stack_train/ --mcmc_track=False # evaluation ``` WADI: ```bash python algorithm/stack_train.py --dataset=WADI --train.train_start=259200 --train.max_train_size=789371 --train.valid_portion=0.1 --model.window_length=30 '--model.output_shape=[15, 15, 30]' --model.z2_dim=8 # training python algorithm/stack_predict.py --load_model_dir=./results/stack_train/ --mcmc_track=False # evaluation ``` The default model configurations are in ``algorithm/InterFusion.py``, train configs in ``algorithm/stack_train.py``, and evaluation configs in ``algorithm/stack_predict.py``. You may overwrite the configs using command line args. For example: ```bash python algorithm/stack_train.py --dataset=omi-1 --model.z_dim=5 --train.batch_size=128 python algorithm/stack_predict.py --load_model_dir=./results/stack_train/ --test_batch_size=100 ``` **Run on your own dataset** 1. Put your train/test/label files under ``data/processed`` folder. e.g., ``ds_train.pkl``, ``ds_test.pkl``, ``ds_test_label.pkl`` with shape ``(train_length, feature_dim)``, ``(test_length, feature_dim)``, ``(test_length,)``, respectively. 2. Put the interpretation files (optional) under ``data/interpretation_label`` folder. 3. Edit ``get_data_dim`` in ``algorithm/utils.py`` to add your dataset info. 4. Run the code following the instructions above. **Results** After running the algorithm, the results are shown in the ``results`` folder. The main results are: ```bash Model: results/stack_train/result_params/ Training config: results/stack_train/config.json Testing config: results/stack_predict/config.json Testing statistics: results/stack_predict/result.json ``` If you find this code useful for your research, please cite our paper: ```bibTex @inproceedings{li2021multivariate, title={Multivariate Time Series Anomaly Detection and Interpretation using Hierarchical Inter-Metric and Temporal Embedding}, author={Li, Zhihan and Zhao, Youjian and Han, Jiaqi and Su, Ya and Jiao, Rui and Wen, Xidao and Pei, Dan}, booktitle={Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery \& Data Mining}, pages={3220--3230}, year={2021} } ``` ================================================ FILE: algorithm/InterFusion.py ================================================ from enum import Enum from typing import Optional, List import logging import tensorflow as tf from tensorflow.contrib.rnn import static_rnn, static_bidirectional_rnn from tensorflow.contrib.framework import arg_scope import tfsnippet as spt from tfsnippet.bayes import BayesianNet from tfsnippet.utils import (instance_reuse, VarScopeObject, reopen_variable_scope) from tfsnippet.distributions import FlowDistribution, Normal from tfsnippet.layers import l2_regularizer import mltk from algorithm.recurrent_distribution import RecurrentDistribution from algorithm.real_nvp import dense_real_nvp from algorithm.conv1d_ import conv1d, deconv1d class RNNCellType(str, Enum): GRU = 'GRU' LSTM = 'LSTM' Basic = 'Basic' class ModelConfig(mltk.Config): x_dim: int = -1 z_dim: int = 3 u_dim: int = 1 window_length = 100 output_shape: List[int] = [25, 25, 50, 50, 100] z2_dim: int = 13 l2_reg = 0.0001 posterior_flow_type: Optional[str] = mltk.config_field(choices=['rnvp', 'nf'], default='rnvp') # can be 'rnvp' for RealNVP, 'nf' for planarNF, None for not using posterior flow. posterior_flow_layers = 20 rnn_cell: RNNCellType = RNNCellType.GRU # can be 'GRU', 'LSTM' or 'Basic' rnn_hidden_units = 500 use_leaky_relu = False use_bidirectional_rnn = False # whether to use bidirectional rnn or not use_self_attention = False # whether to use self-attention on hidden states before infer qz or not. unified_px_logstd = False dropout_feature = False # dropout on the features in arnn logstd_min = -5. logstd_max = 2. use_prior_flow = False # If True, use RealNVP prior flow to enhance the representation of p(z). prior_flow_layers = 20 connect_qz = True connect_pz = True # The final InterFusion model. class MTSAD(VarScopeObject): def __init__(self, config: ModelConfig, name=None, scope=None): self.config = config super(MTSAD, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): if self.config.rnn_cell == RNNCellType.Basic: self.d_fw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='d_fw_cell') self.a_fw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='a_fw_cell') if self.config.use_bidirectional_rnn: self.d_bw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='d_bw_cell') self.a_bw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='a_bw_cell') elif self.config.rnn_cell == RNNCellType.LSTM: self.d_fw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='d_fw_cell') self.a_fw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='a_fw_cell') if self.config.use_bidirectional_rnn: self.d_bw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='d_bw_cell') self.a_bw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='a_bw_cell') elif self.config.rnn_cell == RNNCellType.GRU: self.d_fw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='d_fw_cell') self.a_fw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='a_fw_cell') if self.config.use_bidirectional_rnn: self.d_bw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='d_bw_cell') self.a_bw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='a_bw_cell') else: raise ValueError('rnn cell must be one of GRU, LSTM or Basic.') if self.config.posterior_flow_type == 'nf': self.posterior_flow = spt.layers.planar_normalizing_flows(n_layers=self.config.posterior_flow_layers, scope='posterior_flow') elif self.config.posterior_flow_type == 'rnvp': self.posterior_flow = dense_real_nvp(flow_depth=self.config.posterior_flow_layers, activation=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='posterior_flow') else: self.posterior_flow = None if self.config.use_prior_flow: self.prior_flow = dense_real_nvp(flow_depth=self.config.prior_flow_layers, activation=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), is_prior_flow=True, scope='prior_flow') else: self.prior_flow = None def _my_rnn_net(self, x, window_length, fw_cell, bw_cell=None, time_axis=1, use_bidirectional_rnn=False): """ Get the base rnn model. :param x: The rnn input. :param window_length: The window length of input along time axis. :param fw_cell: Forward rnn cell. :param bw_cell: Optional. Backward rnn cell, only use when config.use_bidirectional_rnn=True. :param time_axis: Which is the time axis in input x, default 1. :param use_bidirectional_rnn: Whether or not use bidirectional rnn. Default false. :return: Tensor (batch_size, window_length, rnn_hidden_units). The output of rnn. """ x = tf.unstack(value=x, num=window_length, axis=time_axis) if use_bidirectional_rnn: outputs, _, _ = static_bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32) else: outputs, _ = static_rnn(fw_cell, x, dtype=tf.float32) outputs = tf.stack(outputs, axis=time_axis) # (batch_size, window_length, rnn_hidden_units) return outputs @instance_reuse def a_rnn_net(self, x, window_length, time_axis=1, use_bidirectional_rnn=False, use_self_attention=False, is_training=False): """ Reverse rnn network a, capture the future information in qnet. """ def dropout_fn(input): return tf.layers.dropout(input, rate=.5, training=is_training) flag = False if len(x.shape) == 4: # (n_samples, batch_size, window_length, x_dim) x, s1, s2 = spt.ops.flatten_to_ndims(x, 3) flag = True elif len(x.shape) != 3: logging.error('rnn input shape error.') # reverse the input sequence reversed_x = tf.reverse(x, axis=[time_axis]) if use_bidirectional_rnn: reversed_outputs = self._my_rnn_net(x=reversed_x, window_length=window_length, fw_cell=self.a_fw_cell, bw_cell=self.a_bw_cell, time_axis=time_axis, use_bidirectional_rnn=use_bidirectional_rnn) else: reversed_outputs = self._my_rnn_net(x=reversed_x, window_length=window_length, fw_cell=self.a_fw_cell, time_axis=time_axis, use_bidirectional_rnn=use_bidirectional_rnn) outputs = tf.reverse(reversed_outputs, axis=[time_axis]) # self attention if use_self_attention: outputs1 = spt.layers.dense(outputs, 500, activation_fn=tf.nn.tanh, use_bias=True, scope='arnn_attention_dense1') outputs1 = tf.nn.softmax(spt.layers.dense(outputs1, window_length, use_bias=False, scope='arnn_attention_dense2'), axis=1) M_t = tf.matmul(tf.transpose(outputs, perm=[0, 2, 1]), outputs1) outputs = tf.transpose(M_t, perm=[0, 2, 1]) # feature extraction layers outputs = spt.layers.dense(outputs, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='arnn_feature_dense1') if self.config.dropout_feature: outputs = dropout_fn(outputs) outputs = spt.layers.dense(outputs, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='arnn_feature_dense2') if self.config.dropout_feature: outputs = dropout_fn(outputs) if flag: outputs = spt.ops.unflatten_from_ndims(outputs, s1, s2) return outputs @instance_reuse def qz_mean_layer(self, x): return spt.layers.dense(x, units=self.config.z_dim, scope='qz_mean') @instance_reuse def qz_logstd_layer(self, x): return tf.clip_by_value(spt.layers.dense(x, units=self.config.z_dim, scope='qz_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) @instance_reuse def pz_mean_layer(self, x): return spt.layers.dense(x, units=self.config.z_dim, scope='pz_mean') @instance_reuse def pz_logstd_layer(self, x): return tf.clip_by_value(spt.layers.dense(x, units=self.config.z_dim, scope='pz_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) @instance_reuse def hz2_deconv(self, z2): with arg_scope([deconv1d], kernel_size=5, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg)): h_z = deconv1d(z2, out_channels=self.config.x_dim, output_shape=self.config.output_shape[0], strides=2) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[1], strides=1) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[2], strides=2) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[3], strides=1) h_z2 = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[4], strides=2) return h_z2 @instance_reuse def q_net(self, x, observed=None, u=None, n_z=None, is_training=False): # vs.name = self.variable_scope.name + "/q_net" logging.info('q_net builder: %r', locals()) net = BayesianNet(observed=observed) def dropout_fn(input): return tf.layers.dropout(input, rate=.5, training=is_training) # use the pretrained z2 which compress along the time dimension qz2_mean, qz2_logstd = self.h_for_qz(x, is_training=is_training) qz2_distribution = Normal(mean=qz2_mean, logstd=qz2_logstd) qz2_distribution = qz2_distribution.batch_ndims_to_value(2) z2 = net.add('z2', qz2_distribution, n_samples=n_z, is_reparameterized=True) # d_{1:t} from deconv h_z = self.h_for_px(z2) # a_{1:t}, (batch_size, window_length, dense_hidden_units) arnn_out = self.a_rnn_net(h_z, window_length=self.config.window_length, use_bidirectional_rnn=self.config.use_bidirectional_rnn, use_self_attention=self.config.use_self_attention, is_training=is_training) if self.config.connect_qz: qz_distribution = RecurrentDistribution(arnn_out, mean_layer=self.qz_mean_layer, logstd_layer=self.qz_logstd_layer, z_dim=self.config.z_dim, window_length=self.config.window_length) else: qz_mean = spt.layers.dense(arnn_out, units=self.config.z_dim, scope='qz1_mean') qz_logstd = tf.clip_by_value(spt.layers.dense(arnn_out, units=self.config.z_dim, scope='qz1_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) qz_distribution = Normal(mean=qz_mean, logstd=qz_logstd) if self.posterior_flow is not None: qz_distribution = FlowDistribution(distribution=qz_distribution, flow=self.posterior_flow).batch_ndims_to_value(1) else: qz_distribution = qz_distribution.batch_ndims_to_value(2) z1 = net.add('z1', qz_distribution, is_reparameterized=True) return net @instance_reuse def p_net(self, observed=None, u=None, n_z=None, is_training=False): logging.info('p_net builder: %r', locals()) net = BayesianNet(observed=observed) pz2_distribution = Normal(mean=tf.zeros([self.config.z2_dim, self.config.x_dim]), logstd=tf.zeros([self.config.z2_dim, self.config.x_dim])).batch_ndims_to_value(2) z2 = net.add('z2', pz2_distribution, n_samples=n_z, is_reparameterized=True) # e_{1:t} from deconv, shared params h_z2 = self.h_for_px(z2) if self.config.connect_pz: pz_distribution = RecurrentDistribution(h_z2, mean_layer=self.pz_mean_layer, logstd_layer=self.pz_logstd_layer, z_dim=self.config.z_dim, window_length=self.config.window_length) else: # non-recurrent pz pz_mean = spt.layers.dense(h_z2, units=self.config.z_dim, scope='pz_mean') pz_logstd = tf.clip_by_value(spt.layers.dense(h_z2, units=self.config.z_dim, scope='pz_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) pz_distribution = Normal(mean=pz_mean, logstd=pz_logstd) if self.prior_flow is not None: pz_distribution = FlowDistribution(distribution=pz_distribution, flow=self.prior_flow).batch_ndims_to_value(1) else: pz_distribution = pz_distribution.batch_ndims_to_value(2) z1 = net.add('z1', pz_distribution, is_reparameterized=True) h_z1 = spt.layers.dense(z1, units=self.config.x_dim) h_z = spt.ops.broadcast_concat(h_z1, h_z2, axis=-1) h_z = spt.layers.dense(h_z, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='feature_dense1') h_z = spt.layers.dense(h_z, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='feature_dense2') x_mean = spt.layers.dense(h_z, units=self.config.x_dim, scope='x_mean') if self.config.unified_px_logstd: x_logstd = tf.clip_by_value( tf.get_variable(name='x_logstd', shape=(), trainable=True, dtype=tf.float32, initializer=tf.constant_initializer(-1., dtype=tf.float32)), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) else: x_logstd = tf.clip_by_value(spt.layers.dense(h_z, units=self.config.x_dim, scope='x_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) x = net.add('x', Normal(mean=x_mean, logstd=x_logstd).batch_ndims_to_value(2), is_reparameterized=True) return net def reconstruct(self, x, u, mask, n_z=None): with tf.name_scope('model.reconstruct'): qnet = self.q_net(x=x, u=u, n_z=n_z) pnet = self.p_net(observed={'z1': qnet['z1'], 'z2': qnet['z2']}, u=u) return pnet['x'] def get_score(self, x_embed, x_eval, u, n_z=None): with tf.name_scope('model.get_score'): qnet = self.q_net(x=x_embed, u=u, n_z=n_z) pnet = self.p_net(observed={'z1': qnet['z1'], 'z2': qnet['z2']}, u=u) score = pnet['x'].distribution.base_distribution.log_prob(x_eval) recons_mean = pnet['x'].distribution.base_distribution.mean recons_std = pnet['x'].distribution.base_distribution.std if n_z is not None: score = tf.reduce_mean(score, axis=0) recons_mean = tf.reduce_mean(recons_mean, axis=0) recons_std = tf.reduce_mean(recons_std, axis=0) return score, recons_mean, recons_std @instance_reuse def h_for_qz(self, x, is_training=False): with arg_scope([conv1d], kernel_size=5, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg)): h_x = conv1d(x, out_channels=self.config.x_dim, strides=2) # 50 h_x = conv1d(h_x, out_channels=self.config.x_dim) h_x = conv1d(h_x, out_channels=self.config.x_dim, strides=2) # 25 h_x = conv1d(h_x, out_channels=self.config.x_dim) h_x = conv1d(h_x, out_channels=self.config.x_dim, strides=2) # 13 qz_mean = conv1d(h_x, kernel_size=1, out_channels=self.config.x_dim) qz_logstd = conv1d(h_x, kernel_size=1, out_channels=self.config.x_dim) qz_logstd = tf.clip_by_value(qz_logstd, clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) return qz_mean, qz_logstd @instance_reuse def h_for_px(self, z): with arg_scope([deconv1d], kernel_size=5, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg)): h_z = deconv1d(z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[0], strides=2) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[1], strides=1) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[2], strides=2) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[3], strides=1) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[4], strides=2) return h_z @instance_reuse def pretrain_q_net(self, x, observed=None, n_z=None, is_training=False): # vs.name = self.variable_scope.name + "/q_net" logging.info('pretrain_q_net builder: %r', locals()) net = BayesianNet(observed=observed) def dropout_fn(input): return tf.layers.dropout(input, rate=.5, training=is_training) qz_mean, qz_logstd = self.h_for_qz(x, is_training=is_training) qz_distribution = Normal(mean=qz_mean, logstd=qz_logstd) qz_distribution = qz_distribution.batch_ndims_to_value(2) z = net.add('z', qz_distribution, n_samples=n_z, is_reparameterized=True) return net @instance_reuse def pretrain_p_net(self, observed=None, n_z=None, is_training=False): logging.info('p_net builder: %r', locals()) net = BayesianNet(observed=observed) pz_distribution = Normal(mean=tf.zeros([self.config.z2_dim, self.config.x_dim]), logstd=tf.zeros([self.config.z2_dim, self.config.x_dim])) pz_distribution = pz_distribution.batch_ndims_to_value(2) z = net.add('z', pz_distribution, n_samples=n_z, is_reparameterized=True) h_z = self.h_for_px(z) px_mean = conv1d(h_z, kernel_size=1, out_channels=self.config.x_dim, scope='pre_px_mean') px_logstd = conv1d(h_z, kernel_size=1, out_channels=self.config.x_dim, scope='pre_px_logstd') px_logstd = tf.clip_by_value(px_logstd, clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) x = net.add('x', Normal(mean=px_mean, logstd=px_logstd).batch_ndims_to_value(2), is_reparameterized=True) return net ================================================ FILE: algorithm/InterFusion_swat.py ================================================ from enum import Enum from typing import Optional, List import logging import tensorflow as tf from tensorflow.contrib.rnn import static_rnn, static_bidirectional_rnn from tensorflow.contrib.framework import arg_scope import tfsnippet as spt from tfsnippet.bayes import BayesianNet from tfsnippet.utils import (instance_reuse, VarScopeObject, reopen_variable_scope) from tfsnippet.distributions import FlowDistribution, Normal from tfsnippet.layers import l2_regularizer import mltk from algorithm.recurrent_distribution import RecurrentDistribution from algorithm.real_nvp import dense_real_nvp from algorithm.conv1d_ import conv1d, deconv1d class RNNCellType(str, Enum): GRU = 'GRU' LSTM = 'LSTM' Basic = 'Basic' class ModelConfig(mltk.Config): x_dim: int = -1 z_dim: int = 3 u_dim: int = 1 window_length = 30 output_shape: List[int] = [15, 15, 30] z2_dim: int = 8 l2_reg = 0.0001 posterior_flow_type: Optional[str] = mltk.config_field(choices=['rnvp', 'nf'], default='rnvp') # can be 'rnvp' for RealNVP, 'nf' for planarNF, None for not using posterior flow. posterior_flow_layers = 20 rnn_cell: RNNCellType = RNNCellType.GRU # can be 'GRU', 'LSTM' or 'Basic' rnn_hidden_units = 500 use_leaky_relu = False use_bidirectional_rnn = False # whether to use bidirectional rnn or not use_self_attention = False # whether to use self-attention on hidden states before infer qz or not. unified_px_logstd = False dropout_feature = False # dropout on the features in arnn logstd_min = -5. logstd_max = 2. use_prior_flow = False # If True, use RealNVP prior flow to enhance the representation of p(z). prior_flow_layers = 20 connect_qz = True connect_pz = True # InterFusion model for SWaT & WADI (differ in num of layers) class MTSAD_SWAT(VarScopeObject): def __init__(self, config: ModelConfig, name=None, scope=None): self.config = config super(MTSAD_SWAT, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): if self.config.rnn_cell == RNNCellType.Basic: self.d_fw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='d_fw_cell') self.a_fw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='a_fw_cell') if self.config.use_bidirectional_rnn: self.d_bw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='d_bw_cell') self.a_bw_cell = tf.nn.rnn_cell.BasicRNNCell(self.config.rnn_hidden_units, name='a_bw_cell') elif self.config.rnn_cell == RNNCellType.LSTM: self.d_fw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='d_fw_cell') self.a_fw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='a_fw_cell') if self.config.use_bidirectional_rnn: self.d_bw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='d_bw_cell') self.a_bw_cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_hidden_units, name='a_bw_cell') elif self.config.rnn_cell == RNNCellType.GRU: self.d_fw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='d_fw_cell') self.a_fw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='a_fw_cell') if self.config.use_bidirectional_rnn: self.d_bw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='d_bw_cell') self.a_bw_cell = tf.nn.rnn_cell.GRUCell(self.config.rnn_hidden_units, name='a_bw_cell') else: raise ValueError('rnn cell must be one of GRU, LSTM or Basic.') if self.config.posterior_flow_type == 'nf': self.posterior_flow = spt.layers.planar_normalizing_flows(n_layers=self.config.posterior_flow_layers, scope='posterior_flow') elif self.config.posterior_flow_type == 'rnvp': self.posterior_flow = dense_real_nvp(flow_depth=self.config.posterior_flow_layers, activation=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='posterior_flow') else: self.posterior_flow = None if self.config.use_prior_flow: self.prior_flow = dense_real_nvp(flow_depth=self.config.prior_flow_layers, activation=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), is_prior_flow=True, scope='prior_flow') else: self.prior_flow = None def _my_rnn_net(self, x, window_length, fw_cell, bw_cell=None, time_axis=1, use_bidirectional_rnn=False): """ Get the base rnn model for d-net and a-net. :param x: The rnn input. :param window_length: The window length of input along time axis. :param fw_cell: Forward rnn cell. :param bw_cell: Optional. Backward rnn cell, only use when config.use_bidirectional_rnn=True. :param time_axis: Which is the time axis in input x, default 1. :param use_bidirectional_rnn: Whether or not use bidirectional rnn. Default false. :return: Tensor (batch_size, window_length, rnn_hidden_units). The output of rnn. """ x = tf.unstack(value=x, num=window_length, axis=time_axis) if use_bidirectional_rnn: outputs, _, _ = static_bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32) else: outputs, _ = static_rnn(fw_cell, x, dtype=tf.float32) outputs = tf.stack(outputs, axis=time_axis) # (batch_size, window_length, rnn_hidden_units) return outputs @instance_reuse def a_rnn_net(self, x, window_length, time_axis=1, use_bidirectional_rnn=False, use_self_attention=False, is_training=False): """ Reverse rnn network a, capture the future information in qnet. """ def dropout_fn(input): return tf.layers.dropout(input, rate=.5, training=is_training) flag = False if len(x.shape) == 4: # (n_samples, batch_size, window_length, x_dim) x, s1, s2 = spt.ops.flatten_to_ndims(x, 3) flag = True elif len(x.shape) != 3: logging.error('rnn input shape error.') # reverse the input sequence reversed_x = tf.reverse(x, axis=[time_axis]) if use_bidirectional_rnn: reversed_outputs = self._my_rnn_net(x=reversed_x, window_length=window_length, fw_cell=self.a_fw_cell, bw_cell=self.a_bw_cell, time_axis=time_axis, use_bidirectional_rnn=use_bidirectional_rnn) else: reversed_outputs = self._my_rnn_net(x=reversed_x, window_length=window_length, fw_cell=self.a_fw_cell, time_axis=time_axis, use_bidirectional_rnn=use_bidirectional_rnn) outputs = tf.reverse(reversed_outputs, axis=[time_axis]) # self attention if use_self_attention: outputs1 = spt.layers.dense(outputs, 500, activation_fn=tf.nn.tanh, use_bias=True, scope='arnn_attention_dense1') outputs1 = tf.nn.softmax(spt.layers.dense(outputs1, window_length, use_bias=False, scope='arnn_attention_dense2'), axis=1) M_t = tf.matmul(tf.transpose(outputs, perm=[0, 2, 1]), outputs1) outputs = tf.transpose(M_t, perm=[0, 2, 1]) # feature extraction layers outputs = spt.layers.dense(outputs, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='arnn_feature_dense1') if self.config.dropout_feature: outputs = dropout_fn(outputs) outputs = spt.layers.dense(outputs, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='arnn_feature_dense2') if self.config.dropout_feature: outputs = dropout_fn(outputs) if flag: outputs = spt.ops.unflatten_from_ndims(outputs, s1, s2) return outputs @instance_reuse def qz_mean_layer(self, x): return spt.layers.dense(x, units=self.config.z_dim, scope='qz_mean') @instance_reuse def qz_logstd_layer(self, x): return tf.clip_by_value(spt.layers.dense(x, units=self.config.z_dim, scope='qz_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) @instance_reuse def pz_mean_layer(self, x): return spt.layers.dense(x, units=self.config.z_dim, scope='pz_mean') @instance_reuse def pz_logstd_layer(self, x): return tf.clip_by_value(spt.layers.dense(x, units=self.config.z_dim, scope='pz_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) @instance_reuse def hz2_deconv(self, z2): with arg_scope([deconv1d], kernel_size=5, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg)): h_z = deconv1d(z2, out_channels=self.config.x_dim, output_shape=self.config.output_shape[0], strides=2) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[1], strides=1) h_z2 = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[2], strides=2) return h_z2 @instance_reuse def q_net(self, x, observed=None, u=None, n_z=None, is_training=False): # vs.name = self.variable_scope.name + "/q_net" logging.info('q_net builder: %r', locals()) net = BayesianNet(observed=observed) def dropout_fn(input): return tf.layers.dropout(input, rate=.5, training=is_training) # use the pretrained z2 which compress along the time dimension qz2_mean, qz2_logstd = self.h_for_qz(x, is_training=is_training) qz2_distribution = Normal(mean=qz2_mean, logstd=qz2_logstd) qz2_distribution = qz2_distribution.batch_ndims_to_value(2) z2 = net.add('z2', qz2_distribution, n_samples=n_z, is_reparameterized=True) # d_{1:t} from deconv h_z = self.h_for_px(z2) # a_{1:t}, (batch_size, window_length, dense_hidden_units) arnn_out = self.a_rnn_net(h_z, window_length=self.config.window_length, use_bidirectional_rnn=self.config.use_bidirectional_rnn, use_self_attention=self.config.use_self_attention, is_training=is_training) if self.config.connect_qz: qz_distribution = RecurrentDistribution(arnn_out, mean_layer=self.qz_mean_layer, logstd_layer=self.qz_logstd_layer, z_dim=self.config.z_dim, window_length=self.config.window_length) else: qz_mean = spt.layers.dense(arnn_out, units=self.config.z_dim, scope='qz1_mean') qz_logstd = tf.clip_by_value(spt.layers.dense(arnn_out, units=self.config.z_dim, scope='qz1_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) qz_distribution = Normal(mean=qz_mean, logstd=qz_logstd) if self.posterior_flow is not None: qz_distribution = FlowDistribution(distribution=qz_distribution, flow=self.posterior_flow).batch_ndims_to_value(1) else: qz_distribution = qz_distribution.batch_ndims_to_value(2) z1 = net.add('z1', qz_distribution, is_reparameterized=True) return net @instance_reuse def p_net(self, observed=None, u=None, n_z=None, is_training=False): logging.info('p_net builder: %r', locals()) net = BayesianNet(observed=observed) pz2_distribution = Normal(mean=tf.zeros([self.config.z2_dim, self.config.x_dim]), logstd=tf.zeros([self.config.z2_dim, self.config.x_dim])).batch_ndims_to_value(2) z2 = net.add('z2', pz2_distribution, n_samples=n_z, is_reparameterized=True) # e_{1:t} from deconv, shared params h_z2 = self.h_for_px(z2) if self.config.connect_pz: pz_distribution = RecurrentDistribution(h_z2, mean_layer=self.pz_mean_layer, logstd_layer=self.pz_logstd_layer, z_dim=self.config.z_dim, window_length=self.config.window_length) else: # non-recurrent pz pz_mean = spt.layers.dense(h_z2, units=self.config.z_dim, scope='pz_mean') pz_logstd = tf.clip_by_value(spt.layers.dense(h_z2, units=self.config.z_dim, scope='pz_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) pz_distribution = Normal(mean=pz_mean, logstd=pz_logstd) if self.prior_flow is not None: pz_distribution = FlowDistribution(distribution=pz_distribution, flow=self.prior_flow).batch_ndims_to_value(1) else: pz_distribution = pz_distribution.batch_ndims_to_value(2) z1 = net.add('z1', pz_distribution, is_reparameterized=True) h_z1 = spt.layers.dense(z1, units=self.config.x_dim) h_z = spt.ops.broadcast_concat(h_z1, h_z2, axis=-1) h_z = spt.layers.dense(h_z, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='feature_dense1') h_z = spt.layers.dense(h_z, units=500, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg), scope='feature_dense2') x_mean = spt.layers.dense(h_z, units=self.config.x_dim, scope='x_mean') if self.config.unified_px_logstd: x_logstd = tf.clip_by_value( tf.get_variable(name='x_logstd', shape=(), trainable=True, dtype=tf.float32, initializer=tf.constant_initializer(-1., dtype=tf.float32)), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) else: x_logstd = tf.clip_by_value(spt.layers.dense(h_z, units=self.config.x_dim, scope='x_logstd'), clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) x = net.add('x', Normal(mean=x_mean, logstd=x_logstd).batch_ndims_to_value(2), is_reparameterized=True) return net def reconstruct(self, x, u, mask, n_z=None): with tf.name_scope('model.reconstruct'): qnet = self.q_net(x=x, u=u, n_z=n_z) pnet = self.p_net(observed={'z1': qnet['z1'], 'z2': qnet['z2']}, u=u) return pnet['x'] def get_score(self, x_embed, x_eval, u, n_z=None): with tf.name_scope('model.get_score'): qnet = self.q_net(x=x_embed, u=u, n_z=n_z) pnet = self.p_net(observed={'z1': qnet['z1'], 'z2': qnet['z2']}, u=u) score = pnet['x'].distribution.base_distribution.log_prob(x_eval) recons_mean = pnet['x'].distribution.base_distribution.mean recons_std = pnet['x'].distribution.base_distribution.std if n_z is not None: score = tf.reduce_mean(score, axis=0) recons_mean = tf.reduce_mean(recons_mean, axis=0) recons_std = tf.reduce_mean(recons_std, axis=0) return score, recons_mean, recons_std @instance_reuse def h_for_qz(self, x, is_training=False): with arg_scope([conv1d], kernel_size=5, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg)): h_x = conv1d(x, out_channels=self.config.x_dim, strides=2) # 15 h_x = conv1d(h_x, out_channels=self.config.x_dim) h_x = conv1d(h_x, out_channels=self.config.x_dim, strides=2) # 8 qz_mean = conv1d(h_x, kernel_size=1, out_channels=self.config.x_dim) qz_logstd = conv1d(h_x, kernel_size=1, out_channels=self.config.x_dim) qz_logstd = tf.clip_by_value(qz_logstd, clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) return qz_mean, qz_logstd @instance_reuse def h_for_px(self, z): with arg_scope([deconv1d], kernel_size=5, activation_fn=tf.nn.leaky_relu if self.config.use_leaky_relu else tf.nn.relu, kernel_regularizer=l2_regularizer(self.config.l2_reg)): h_z = deconv1d(z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[0], strides=2) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[1], strides=1) h_z = deconv1d(h_z, out_channels=self.config.x_dim, output_shape=self.config.output_shape[2], strides=2) return h_z @instance_reuse def pretrain_q_net(self, x, observed=None, n_z=None, is_training=False): # vs.name = self.variable_scope.name + "/q_net" logging.info('pretrain_q_net builder: %r', locals()) net = BayesianNet(observed=observed) def dropout_fn(input): return tf.layers.dropout(input, rate=.5, training=is_training) qz_mean, qz_logstd = self.h_for_qz(x, is_training=is_training) qz_distribution = Normal(mean=qz_mean, logstd=qz_logstd) qz_distribution = qz_distribution.batch_ndims_to_value(2) z = net.add('z', qz_distribution, n_samples=n_z, is_reparameterized=True) return net @instance_reuse def pretrain_p_net(self, observed=None, n_z=None, is_training=False): logging.info('p_net builder: %r', locals()) net = BayesianNet(observed=observed) pz_distribution = Normal(mean=tf.zeros([self.config.z2_dim, self.config.x_dim]), logstd=tf.zeros([self.config.z2_dim, self.config.x_dim])) pz_distribution = pz_distribution.batch_ndims_to_value(2) z = net.add('z', pz_distribution, n_samples=n_z, is_reparameterized=True) h_z = self.h_for_px(z) px_mean = conv1d(h_z, kernel_size=1, out_channels=self.config.x_dim, scope='pre_px_mean') px_logstd = conv1d(h_z, kernel_size=1, out_channels=self.config.x_dim, scope='pre_px_logstd') px_logstd = tf.clip_by_value(px_logstd, clip_value_min=self.config.logstd_min, clip_value_max=self.config.logstd_max) x = net.add('x', Normal(mean=px_mean, logstd=px_logstd).batch_ndims_to_value(2), is_reparameterized=True) return net ================================================ FILE: algorithm/__init__.py ================================================ from .recurrent_distribution import RecurrentDistribution from .real_nvp import dense_real_nvp from .utils import * from .mcmc_recons import * ================================================ FILE: algorithm/cal_IPS.py ================================================ import os import pickle import numpy as np def cal_IPS(path, dataset, mcmc, is_pretrain): path += '/' # load labels f = open("./data/processed/" + dataset + "_test_label.pkl", "rb") test_label = pickle.load(f).reshape((-1)) f.close() label_dir = './data/interpretation_label/' + dataset + '.txt' try: labels = str(open(label_dir, 'rb').read(), encoding='utf8') except: raise FileNotFoundError('cannot find label via path: ' + label_dir) # parse labels labels = labels.split('\n') if len(labels[-1]) == 0: labels = labels[:-1] intervals = [] dims = [] for i in labels: t = i.split(':') assert len(t) == 2 intervals.append([int(_) for _ in t[0].split('-')]) assert len(intervals[-1]) == 2 dims.append([int(_) for _ in t[1].split(',')]) assert len(intervals) == len(dims) interpret_dict = {} tp_res = {} # form the interpret dict: {(idx_st, idx_ed): interpret_dims} for _ in range(len(intervals)): interpret_dict[tuple(intervals[_])] = dims[_] if mcmc: assert is_pretrain is False try: tracker = pickle.load(open(path + 'mcmc_tracker.pkl', 'rb')) except: raise FileNotFoundError('cannot find mcmc_tracker.pkl') # preprocess tracker to get: # {idx_ed: best_score_mcmc (np.array(shape=[window_size, x_dim]))} for ed_idx in tracker: shape = tracker[ed_idx]['best_score'].shape assert len(shape) == 3 tp_res[ed_idx] = tracker[ed_idx]['best_score'].reshape([shape[1], shape[2]]) else: try: if is_pretrain: full_recons = np.load(path + 'pretrain_full_recons_window_probs.npz') else: full_recons = np.load(path + 'full_recons_window_probs.npz') except: raise FileNotFoundError('cannot find full_recons_window_probs.npz!') try: if is_pretrain: test_score = pickle.load(open(path + 'pretrain_test_score.pkl', 'rb')) else: test_score = pickle.load(open(path + 'test_score.pkl', 'rb')) except: raise FileNotFoundError('cannot find test_score.pkl!') # get all tp points idx full_recons = full_recons['full_full_test_recons_window_probs'] assert full_recons.shape[0] == len(test_label) - 100 + 1 test_label = test_label[-len(test_score):] assert len(test_score) == len(test_label) t, th = get_best_f1(test_score, test_label) tp_idx = np.logical_and(test_label > 0.5, test_score <= th) tp_idx = np.where(tp_idx)[0] tp_idx += (100 - 1) # get the windows which end by tp_idx for ed_idx in tp_idx: tp_res[ed_idx] = full_recons[ed_idx - 100 + 1] assert tp_res[ed_idx].shape[0] == 100 results = {} for p in [100, 150]: prefix = 'p=' + str(p) + ': ' # segment-wise aggregation within_window_funcs = [lambda x: x[-1]] within_window_func_names = ['wd_last'] assert len(within_window_func_names) == len(within_window_funcs) def min_aggr_and_keep(x): temp = np.min(x, axis=0) return [temp for _ in range(len(x))] def ave_aggr_and_keep(x): temp = np.mean(x, axis=0) return [temp for _ in range(len(x))] def max_aggr_and_keep(x): temp = np.max(x, axis=0) return [temp for _ in range(len(x))] def min_aggr(x): return [np.min(x, axis=0)] def ave_aggr(x): return [np.mean(x, axis=0)] def max_aggr(x): return [np.max(x, axis=0)] within_interval_sc_funcs = [min_aggr_and_keep] within_interval_sc_func_names = ['itv_min_weight'] for wd_idx, window_func in enumerate(within_window_funcs): for iv_idx, itv_func in enumerate(within_interval_sc_funcs): combine_name = prefix + within_window_func_names[wd_idx] + '_' + within_interval_sc_func_names[iv_idx] # compute aggr score itv = {} for ed_idx in tp_res: for interval in interpret_dict: if interval[0] <= ed_idx <= interval[1]: # this TP in this interval dim_scores = window_func(tp_res[ed_idx]).reshape((-1)) if interval in itv: itv[interval].append(dim_scores) else: itv[interval] = [dim_scores] break scores = [] labels = [] for interval in itv: temp = itv_func(itv[interval]) scores += temp labels += [interpret_dict[interval] for _ in range(len(temp))] assert len(scores) == len(labels) # compute Interpretation score hit_rate_collector = [] for idx, dim_scores in enumerate(scores): dim_order = np.argsort(dim_scores) + 1 hit_rate = get_hit_rate(pred=dim_order, label=labels[idx], p=p) hit_rate_collector.append(hit_rate) hit_rate = np.mean(hit_rate_collector) results[combine_name] = hit_rate if is_pretrain: res = {} for _ in results: res['pretrain_' + _] = results[_] results = res if mcmc: res = {} for _ in results: res['mcmc_' + _] = results[_] results = res return results def get_hit_rate(pred, label, p): chance_num = min(int(p / 100 * len(label)), len(pred)) cnt = 0 for _ in range(chance_num): if pred[_] in label: cnt += 1 hit_rate = cnt / len(label) return hit_rate # here for our refined best-f1 search method def get_best_f1(score, label): ''' :param score: 1-D array, input score, tot_length :param label: 1-D array, standard label for anomaly :return: list for results, threshold ''' assert score.shape == label.shape print('***computing best f1***') search_set = [] tot_anomaly = 0 for i in range(label.shape[0]): tot_anomaly += (label[i] > 0.5) flag = 0 cur_anomaly_len = 0 cur_min_anomaly_score = 1e5 for i in range(label.shape[0]): if label[i] > 0.5: # here for an anomaly if flag == 1: cur_anomaly_len += 1 cur_min_anomaly_score = score[i] if score[i] < cur_min_anomaly_score else cur_min_anomaly_score else: flag = 1 cur_anomaly_len = 1 cur_min_anomaly_score = score[i] else: # here for normal points if flag == 1: flag = 0 search_set.append((cur_min_anomaly_score, cur_anomaly_len, True)) search_set.append((score[i], 1, False)) else: search_set.append((score[i], 1, False)) if flag == 1: search_set.append((cur_min_anomaly_score, cur_anomaly_len, True)) search_set.sort(key=lambda x: x[0]) best_f1_res = - 1 threshold = 1 P = 0 TP = 0 best_P = 0 best_TP = 0 for i in range(len(search_set)): P += search_set[i][1] if search_set[i][2]: # for an anomaly point TP += search_set[i][1] precision = TP / (P + 1e-5) recall = TP / (tot_anomaly + 1e-5) f1 = 2 * precision * recall / (precision + recall + 1e-5) if f1 > best_f1_res: best_f1_res = f1 threshold = search_set[i][0] best_P = P best_TP = TP print('*** best_f1 ***: ', best_f1_res) print('*** threshold ***: ', threshold) return (best_f1_res, best_TP / (best_P + 1e-5), best_TP / (tot_anomaly + 1e-5), best_TP, score.shape[0] - best_P - tot_anomaly + best_TP, best_P - best_TP, tot_anomaly - best_TP), threshold ================================================ FILE: algorithm/conv1d_.py ================================================ import numpy as np import tensorflow as tf from tensorflow.contrib.framework import add_arg_scope from tfsnippet.ops import (assert_rank, assert_scalar_equal, flatten_to_ndims, unflatten_from_ndims) from tfsnippet.utils import (validate_positive_int_arg, ParamSpec, is_tensor_object, assert_deps, get_shape, add_name_and_scope_arg_doc, model_variable, maybe_check_numerics, maybe_add_histogram, InputSpec, get_static_shape, validate_enum_arg, validate_int_tuple_arg) __all__ = [ 'conv1d', 'deconv1d', 'validate_conv1d_input', 'get_deconv_output_length', 'batch_norm_1d' ] @add_arg_scope def batch_norm_1d(input, channels_last=True, training=False, name=None, scope=None): """ Apply batch normalization on 1D convolutional layer. Args: input (tf.Tensor): The input tensor. channels_last (bool): Whether or not the channel dimension is at last? training (bool or tf.Tensor): Whether or not the model is under training stage? Returns: tf.Tensor: The normalized tensor. """ with tf.variable_scope(scope, default_name=name or 'batch_norm_1d'): input, s1, s2 = flatten_to_ndims(input, ndims=3) output = tf.layers.batch_normalization( input, axis=-1 if channels_last else -2, training=training, name='norm' ) output = unflatten_from_ndims(output, s1, s2) return output def validate_conv1d_input(input, channels_last, arg_name='input'): """ Validate the input for 1-d convolution. Args: input: The input tensor, must be at least 3-d. channels_last (bool): Whether or not the last dimension is the channels dimension? (i.e., the data format is (batch, length, channels)) arg_name (str): Name of the input argument. Returns: (tf.Tensor, int, str): The validated input tensor, the number of input channels, and the data format. """ if channels_last: channel_axis = -1 input_spec = InputSpec(shape=('...', '?', '?', '*')) data_format = "NWC" else: channel_axis = -2 input_spec = InputSpec(shape=('...', '?', '*', '?')) data_format = "NCW" input = input_spec.validate(arg_name, input) input_shape = get_static_shape(input) in_channels = input_shape[channel_axis] return input, in_channels, data_format def get_deconv_output_length(input_length, kernel_size, strides, padding): """ Get the output length of deconvolution at a specific dimension. Args: input_length: Input tensor length. kernel_size: The size of the kernel. strides: The stride of convolution. padding: One of {"same", "valid"}, case in-sensitive Returns: int: The output length of deconvolution. """ padding = validate_enum_arg( 'padding', str(padding).upper(), ['SAME', 'VALID']) output_length = input_length * strides if padding == 'VALID': output_length += max(kernel_size - strides, 0) return output_length @add_arg_scope @add_name_and_scope_arg_doc def conv1d(input, out_channels, kernel_size, strides=1, dilations=1, padding='same', channels_last=True, activation_fn=None, normalizer_fn=None, gated=False, gate_sigmoid_bias=2., kernel=None, kernel_mask=None, kernel_initializer=None, kernel_regularizer=None, kernel_constraint=None, use_bias=None, bias=None, bias_initializer=tf.zeros_initializer(), bias_regularizer=None, bias_constraint=None, trainable=True, name=None, scope=None): """ 1D convolutional layer. Args: input (Tensor): The input tensor, at least 3-d. out_channels (int): The channel numbers of the output. kernel_size (int or tuple(int,)): Kernel size over spatial dimensions. strides (int): Strides over spatial dimensions. dilations (int): The dilation factor over spatial dimensions. padding: One of {"valid", "same"}, case in-sensitive. channels_last (bool): Whether or not the channel axis is the last axis in `input`? (i.e., the data format is "NWC") activation_fn: The activation function. normalizer_fn: The normalizer function. gated (bool): Whether or not to use gate on output? `output = activation_fn(output) * sigmoid(gate)`. gate_sigmoid_bias (Tensor): The bias added to `gate` before applying the `sigmoid` activation. kernel (Tensor): Instead of creating a new variable, use this tensor. kernel_mask (Tensor): If specified, multiply this mask onto `kernel`, i.e., the actual kernel to use will be `kernel * kernel_mask`. kernel_initializer: The initializer for `kernel`. Would be ``default_kernel_initializer(...)`` if not specified. kernel_regularizer: The regularizer for `kernel`. kernel_constraint: The constraint for `kernel`. use_bias (bool or None): Whether or not to use `bias`? If :obj:`True`, will always use bias. If :obj:`None`, will use bias only if `normalizer_fn` is not given. If :obj:`False`, will never use bias. Default is :obj:`None`. bias (Tensor): Instead of creating a new variable, use this tensor. bias_initializer: The initializer for `bias`. bias_regularizer: The regularizer for `bias`. bias_constraint: The constraint for `bias`. trainable (bool): Whether or not the parameters are trainable? Returns: tf.Tensor: The output tensor. """ if not channels_last: raise ValueError('Currently only channels_last=True is supported.') input, in_channels, data_format = \ validate_conv1d_input(input, channels_last) out_channels = validate_positive_int_arg('out_channels', out_channels) dtype = input.dtype.base_dtype if gated: out_channels *= 2 # check functional arguments padding = validate_enum_arg( 'padding', str(padding).upper(), ['VALID', 'SAME']) dilations = validate_positive_int_arg('dilations', dilations) strides = validate_positive_int_arg('strides', strides) if dilations > 1 and not channels_last: raise ValueError('`channels_last` == False is incompatible with ' '`dilations` > 1.') if strides > 1 and dilations > 1: raise ValueError('`strides` > 1 is incompatible with `dilations` > 1.') if use_bias is None: use_bias = normalizer_fn is None # get the specification of outputs and parameters kernel_size = validate_int_tuple_arg('kernel_size', kernel_size) kernel_shape = kernel_size + (in_channels, out_channels) bias_shape = (out_channels,) # validate the parameters if kernel is not None: kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype) kernel = kernel_spec.validate('kernel', kernel) if kernel_mask is not None: kernel_mask_spec = InputSpec(dtype=dtype) kernel_mask = kernel_mask_spec.validate('kernel_mask', kernel_mask) if kernel_initializer is None: kernel_initializer = tf.glorot_normal_initializer() if bias is not None: bias_spec = ParamSpec(shape=bias_shape, dtype=dtype) bias = bias_spec.validate('bias', bias) # the main part of the conv1d layer with tf.variable_scope(scope, default_name=name or 'conv1d'): c_axis = -1 if channels_last else -2 # create the variables if kernel is None: kernel = model_variable( 'kernel', shape=kernel_shape, dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer, constraint=kernel_constraint, trainable=trainable ) if kernel_mask is not None: kernel = kernel * kernel_mask maybe_add_histogram(kernel, 'kernel') kernel = maybe_check_numerics(kernel, 'kernel') if use_bias and bias is None: bias = model_variable( 'bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable ) maybe_add_histogram(bias, 'bias') bias = maybe_check_numerics(bias, 'bias') # flatten to 3d output, s1, s2 = flatten_to_ndims(input, 3) # do convolution if dilations > 1: output = tf.nn.convolution( input=output, filter=kernel, dilation_rate=(dilations,), padding=padding, data_format=data_format ) else: output = tf.nn.conv1d( value=output, filters=kernel, stride=strides, padding=padding, data_format=data_format ) # add bias if use_bias: output = tf.add(output, bias) # apply the normalization function if specified if normalizer_fn is not None: output = normalizer_fn(output) # split into halves if gated if gated: output, gate = tf.split(output, 2, axis=c_axis) # apply the activation function if specified if activation_fn is not None: output = activation_fn(output) # apply the gate if required if gated: if gate_sigmoid_bias is None: gate_sigmoid_bias = model_variable( 'gate_sigmoid_bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable ) maybe_add_histogram(gate_sigmoid_bias, 'gate_sigmoid_bias') gate_sigmoid_bias = maybe_check_numerics(gate_sigmoid_bias, 'gate_sigmoid_bias') output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate') # unflatten back to original shape output = unflatten_from_ndims(output, s1, s2) maybe_add_histogram(output, 'output') output = maybe_check_numerics(output, 'output') return output @add_arg_scope @add_name_and_scope_arg_doc def deconv1d(input, out_channels, kernel_size, strides=1, padding='same', channels_last=True, output_shape=None, activation_fn=None, normalizer_fn=None, gated=False, gate_sigmoid_bias=2., kernel=None, kernel_initializer=None, kernel_regularizer=None, kernel_constraint=None, use_bias=None, bias=None, bias_initializer=tf.zeros_initializer(), bias_regularizer=None, bias_constraint=None, trainable=True, name=None, scope=None): """ 1D deconvolutional layer. Args: input (Tensor): The input tensor, at least 3-d. out_channels (int): The channel numbers of the deconvolution output. kernel_size (int or tuple(int,)): Kernel size over spatial dimensions. strides (int): Strides over spatial dimensions. padding: One of {"valid", "same"}, case in-sensitive. channels_last (bool): Whether or not the channel axis is the last axis in `input`? (i.e., the data format is "NWC") output_shape: If specified, use this as the shape of the deconvolution output; otherwise compute the size of each dimension by:: output_size = input_size * strides if padding == 'valid': output_size += max(kernel_size - strides, 0) activation_fn: The activation function. normalizer_fn: The normalizer function. gated (bool): Whether or not to use gate on output? `output = activation_fn(output) * sigmoid(gate)`. gate_sigmoid_bias (Tensor): The bias added to `gate` before applying the `sigmoid` activation. kernel (Tensor): Instead of creating a new variable, use this tensor. kernel_initializer: The initializer for `kernel`. Would be ``default_kernel_initializer(...)`` if not specified. kernel_regularizer: The regularizer for `kernel`. kernel_constraint: The constraint for `kernel`. use_bias (bool or None): Whether or not to use `bias`? If :obj:`True`, will always use bias. If :obj:`None`, will use bias only if `normalizer_fn` is not given. If :obj:`False`, will never use bias. Default is :obj:`None`. bias (Tensor): Instead of creating a new variable, use this tensor. bias_initializer: The initializer for `bias`. bias_regularizer: The regularizer for `bias`. bias_constraint: The constraint for `bias`. trainable (bool): Whether or not the parameters are trainable? Returns: tf.Tensor: The output tensor. """ if not channels_last: raise ValueError('Currently only channels_last=True is supported.') input, in_channels, data_format = \ validate_conv1d_input(input, channels_last) out_channels = validate_positive_int_arg('out_channels', out_channels) dtype = input.dtype.base_dtype if gated: out_channels *= 2 # check functional arguments padding = validate_enum_arg( 'padding', str(padding).upper(), ['VALID', 'SAME']) strides = validate_positive_int_arg('strides', strides) if use_bias is None: use_bias = normalizer_fn is None # get the specification of outputs and parameters kernel_size = validate_int_tuple_arg('kernel_size', kernel_size) kernel_shape = kernel_size + (out_channels, in_channels) bias_shape = (out_channels,) given_w = None given_output_shape = output_shape if is_tensor_object(given_output_shape): given_output_shape = tf.convert_to_tensor(given_output_shape) elif given_output_shape is not None: given_w = given_output_shape # validate the parameters if kernel is not None: kernel_spec = ParamSpec(shape=kernel_shape, dtype=dtype) kernel = kernel_spec.validate('kernel', kernel) if kernel_initializer is None: kernel_initializer = tf.glorot_normal_initializer() if bias is not None: bias_spec = ParamSpec(shape=bias_shape, dtype=dtype) bias = bias_spec.validate('bias', bias) # the main part of the conv2d layer with tf.variable_scope(scope, default_name=name or 'deconv1d'): with tf.name_scope('output_shape'): # detect the input shape and axis arrangements input_shape = get_static_shape(input) if channels_last: c_axis, w_axis = -1, -2 else: c_axis, w_axis = -2, -1 output_shape = [None, None, None] output_shape[c_axis] = out_channels if given_output_shape is None: if input_shape[w_axis] is not None: output_shape[w_axis] = get_deconv_output_length( input_shape[w_axis], kernel_shape[0], strides[0], padding ) else: if not is_tensor_object(given_output_shape): output_shape[w_axis] = given_w # infer the batch shape in 3-d batch_shape = input_shape[:-2] if None not in batch_shape: output_shape[0] = int(np.prod(batch_shape)) # now the static output shape is ready output_static_shape = tf.TensorShape(output_shape) # prepare for the dynamic batch shape if output_shape[0] is None: output_shape[0] = tf.reduce_prod(get_shape(input)[:-2]) # prepare for the dynamic spatial dimensions if output_shape[w_axis] is None: if given_output_shape is None: input_shape = get_shape(input) if output_shape[w_axis] is None: output_shape[w_axis] = get_deconv_output_length( input_shape[w_axis], kernel_shape[0], strides[0], padding ) else: assert(is_tensor_object(given_output_shape)) with assert_deps([ assert_rank(given_output_shape, 1), assert_scalar_equal( tf.size(given_output_shape), 1) ]): output_shape[w_axis] = given_output_shape[0] # compose the final dynamic shape if any(is_tensor_object(s) for s in output_shape): output_shape = tf.stack(output_shape) else: output_shape = tuple(output_shape) # create the variables if kernel is None: kernel = model_variable( 'kernel', shape=kernel_shape, dtype=dtype, initializer=kernel_initializer, regularizer=kernel_regularizer, constraint=kernel_constraint, trainable=trainable ) maybe_add_histogram(kernel, 'kernel') kernel = maybe_check_numerics(kernel, 'kernel') if use_bias and bias is None: bias = model_variable( 'bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable ) maybe_add_histogram(bias, 'bias') bias = maybe_check_numerics(bias, 'bias') # flatten to 3d output, s1, s2 = flatten_to_ndims(input, 3) # do convolution or deconvolution output = tf.contrib.nn.conv1d_transpose( value=output, filter=kernel, output_shape=output_shape, stride=strides, padding=padding, data_format=data_format ) if output_static_shape is not None: output.set_shape(output_static_shape) # add bias if use_bias: output = tf.add(output, bias) # apply the normalization function if specified if normalizer_fn is not None: output = normalizer_fn(output) # split into halves if gated if gated: output, gate = tf.split(output, 2, axis=c_axis) # apply the activation function if specified if activation_fn is not None: output = activation_fn(output) # apply the gate if required if gated: if gate_sigmoid_bias is None: gate_sigmoid_bias = model_variable( 'gate_sigmoid_bias', shape=bias_shape, initializer=bias_initializer, regularizer=bias_regularizer, constraint=bias_constraint, trainable=trainable ) maybe_add_histogram(gate_sigmoid_bias, 'gate_sigmoid_bias') gate_sigmoid_bias = maybe_check_numerics(gate_sigmoid_bias, 'gate_sigmoid_bias') output = output * tf.sigmoid(gate + gate_sigmoid_bias, name='gate') # unflatten back to original shape output = unflatten_from_ndims(output, s1, s2) maybe_add_histogram(output, 'output') output = maybe_check_numerics(output, 'output') return output ================================================ FILE: algorithm/mcmc_recons.py ================================================ import tensorflow as tf __all__ = ['masked_reconstruct', 'mcmc_reconstruct'] def masked_reconstruct(reconstruct, x, u, mask, name=None): """ Replace masked elements of `x` with reconstructed outputs. The potential anomaly points on x can be masked, and replaced by the reconstructed values. This can make the reconstruction more likely to be the normal pattern x should follow. Args: reconstruct ((tf.Tensor, tf.Tensor, tf.Tensor) -> tf.Tensor): Function for reconstructing `x`. x: The tensor to be reconstructed by `func`. u: Additional input for reconstructing `x`. mask: (tf.Tensor) mask, must be broadcastable into the shape of `x`. Indicating whether or not to mask each element of `x`. name (str): Name of this operation in TensorFlow graph. (default "masked_reconstruct") Returns: tf.Tensor: `x` with masked elements replaced by reconstructed outputs. """ with tf.name_scope(name, default_name='masked_reconstruct'): x = tf.convert_to_tensor(x) # type: tf.Tensor mask = tf.convert_to_tensor(mask, dtype=tf.int32) # type: tf.Tensor mask = tf.broadcast_to(mask, tf.shape(x)) # get reconstructed x. Currently only support mask the last point if pixelcnn decoder is used. x_recons = reconstruct(x, u, mask) # get masked outputs return tf.where(tf.cast(mask, dtype=tf.bool), x_recons, x) def mcmc_reconstruct(reconstruct, x, u, mask, iter_count, back_prop=True, name=None): """ Iteratively reconstruct `x` with `mask` for `iter_count` times. This method will call :func:`masked_reconstruct` for `iter_count` times, with the output from previous iteration as the input `x` for the next iteration. The output of the final iteration would be returned. Args: reconstruct: Function for reconstructing `x`. x: The tensor to be reconstructed by `func`. u: Additional input for reconstructing `x`. mask: (tf.Tensor) mask, must be broadcastable into the shape of `x`. Indicating whether or not to mask each element of `x`. iter_count (int or tf.Tensor): Number of mcmc iterations(must be greater than 1). back_prop (bool): Whether or not to support back-propagation through all the iterations? (default :obj:`True`) name (str): Name of this operation in TensorFlow graph. (default "iterative_masked_reconstruct") Returns: tf.Tensor: The iteratively reconstructed `x`. """ with tf.name_scope(name, default_name='mcmc_reconstruct'): # do the masked reconstructions x_recons, _ = tf.while_loop( lambda x_i, i: i < iter_count, lambda x_i, i: (masked_reconstruct(reconstruct, x_i, u, mask), i + 1), [x, tf.constant(0, dtype=tf.int32)], back_prop=back_prop ) return x_recons ================================================ FILE: algorithm/real_nvp.py ================================================ import tensorflow as tf import tfsnippet as spt from tensorflow.contrib.framework import arg_scope import numpy as np from tfsnippet.layers.flows.utils import ZeroLogDet class FeatureReversingFlow(spt.layers.FeatureMappingFlow): def __init__(self, axis=-1, value_ndims=1, name=None, scope=None): super(FeatureReversingFlow, self).__init__( axis=int(axis), value_ndims=value_ndims, name=name, scope=scope) @property def explicitly_invertible(self): return True def _build(self, input=None): pass def _reverse_feature(self, x, compute_y, compute_log_det): n_features = spt.utils.get_static_shape(x)[self.axis] if n_features is None: raise ValueError('The feature dimension must be fixed.') assert (0 > self.axis >= -self.value_ndims >= -len(spt.utils.get_static_shape(x))) permutation = np.asarray(list(reversed(range(n_features))), dtype=np.int32) # compute y y = None if compute_y: y = tf.gather(x, permutation, axis=self.axis) # compute log_det log_det = None if compute_log_det: log_det = ZeroLogDet(spt.utils.get_shape(x)[:-self.value_ndims], x.dtype.base_dtype) return y, log_det def _transform(self, x, compute_y, compute_log_det): return self._reverse_feature(x, compute_y, compute_log_det) def _inverse_transform(self, y, compute_x, compute_log_det): return self._reverse_feature(y, compute_x, compute_log_det) def dense_real_nvp(flow_depth: int, activation, kernel_regularizer, scope: str, use_invertible_flow=True, strict_invertible=False, use_actnorm_flow=False, dense_coupling_n_hidden_layers=1, dense_coupling_n_hidden_units=100, coupling_scale_shift_initializer='zero', # 'zero' or 'normal' coupling_scale_shift_normal_initializer_stddev=0.001, coupling_scale_type='sigmoid', # 'sigmoid' or 'exp' coupling_sigmoid_scale_bias=2., is_prior_flow=False) -> spt.layers.BaseFlow: def shift_and_scale(x1, n2): with arg_scope([spt.layers.dense], activation_fn=activation, kernel_regularizer=kernel_regularizer): h = x1 for j in range(dense_coupling_n_hidden_layers): h = spt.layers.dense(h, units=dense_coupling_n_hidden_units, scope='hidden_{}'.format(j)) # compute shift and scale if coupling_scale_shift_initializer == 'zero': pre_params_initializer = tf.zeros_initializer() else: pre_params_initializer = tf.random_normal_initializer( stddev=coupling_scale_shift_normal_initializer_stddev) pre_params = spt.layers.dense(h, units=n2 * 2, kernel_initializer=pre_params_initializer, scope='shift_and_scale',) shift = pre_params[..., :n2] scale = pre_params[..., n2:] return shift, scale with tf.variable_scope(scope): flows = [] for i in range(flow_depth): level = [] if use_invertible_flow: level.append( spt.layers.InvertibleDense( strict_invertible=strict_invertible) ) else: level.append(FeatureReversingFlow()) level.append( spt.layers.CouplingLayer( tf.make_template( 'coupling', shift_and_scale, create_scope_now_=True), scale_type=coupling_scale_type, sigmoid_scale_bias=coupling_sigmoid_scale_bias, ) ) if use_actnorm_flow: level.append(spt.layers.ActNorm()) flows.extend(level) flow = spt.layers.SequentialFlow(flows) if is_prior_flow: flow = flow.invert() return flow ================================================ FILE: algorithm/recurrent_distribution.py ================================================ import tensorflow as tf import tfsnippet as spt from tfsnippet.distributions import Distribution, Normal from tfsnippet.stochastic import StochasticTensor import numpy as np class RecurrentDistribution(Distribution): def __init__(self, input, mean_layer, logstd_layer, z_dim, window_length, is_reparameterized=True, check_numerics=False): batch_shape = spt.utils.concat_shapes([spt.utils.get_shape(input)[:-1], [z_dim]]) batch_static_shape = tf.TensorShape(spt.utils.get_static_shape(input)[:-1] + (z_dim,)) super(RecurrentDistribution, self).__init__(dtype=input.dtype, is_continuous=True, is_reparameterized=is_reparameterized, value_ndims=0, batch_shape=batch_shape, batch_static_shape=batch_static_shape) self.mean_layer = mean_layer self.logstd_layer = logstd_layer self.z_dim = z_dim self._check_numerics = check_numerics self.window_length = window_length self.origin_input = input if len(input.shape) > 3: input, s1, s2 = spt.ops.flatten_to_ndims(input, 3) self.time_first_input = tf.transpose(input, [1, 0, 2]) self.s1 = s1 self.s2 = s2 self.need_unflatten = True elif len(input.shape) == 3: self.time_first_input = tf.transpose(input, [1, 0, 2]) # (window_length, batch_size, feature_dim) self.s1 = None self.s2 = None self.need_unflatten = False else: raise ValueError('Invalid input shape in recurrent distribution.') self._mu = None self._logstd = None def mean(self): return self._mu def logstd(self): return self._logstd def _normal_pdf(self, x, mu, logstd): c = -0.5 * np.log(2 * np.pi) precision = tf.exp(-2 * logstd) if self._check_numerics: precision = tf.check_numerics(precision, "precision") log_prob = c - logstd - 0.5 * precision * tf.square(x - mu) if self._check_numerics: log_prob = tf.check_numerics(log_prob, 'log_prob') return log_prob def sample_step(self, a, t): z_previous, mu_z_previous, logstd_z_previous, _ = a noise, input = t # use the sampled z to derive the (mu. sigma) on next timestamp. may introduce small noise for each sample step. concat_input = spt.ops.broadcast_concat(input, z_previous, axis=-1) mu = self.mean_layer(concat_input) # n_sample * batch_size * z_dim logstd = self.logstd_layer(concat_input) # n_sample * batch_size * z_dim std = spt.utils.maybe_check_numerics(tf.exp(logstd), name='recurrent_distribution_z_std', message='z_std in recurrent distribution exceeds.') z_n = mu + std * noise log_prob = self._normal_pdf(z_n, mu, logstd) return z_n, mu, logstd, log_prob def log_prob_step(self, a, t): z_previous, _, _, log_prob_previous = a given_n, input_n = t concat_input = spt.ops.broadcast_concat(z_previous, input_n, axis=-1) mu = self.mean_layer(concat_input) logstd = self.logstd_layer(concat_input) log_prob_n = self._normal_pdf(given_n, mu, logstd) return given_n, mu, logstd, log_prob_n def sample(self, n_samples=None, is_reparameterized=None, group_ndims=0, compute_density=False, name=None): if n_samples is None: n_samples = 1 n_samples_is_none = True else: n_samples_is_none = False with tf.name_scope(name=name, default_name='sample'): noise = tf.random_normal(shape=[n_samples, tf.shape(self.time_first_input)[0], tf.shape(self.time_first_input)[1], self.z_dim]) # (n_samples, window_length, batch_size, z_dim) noise = tf.transpose(noise, [1, 0, 2, 3]) # (window_length, n_samples, batch_size, z_dim) time_indices_shape = tf.convert_to_tensor([n_samples, tf.shape(self.time_first_input)[1], self.z_dim]) # (n_samples, batch_size, z_dim) results = tf.scan(fn=self.sample_step, elems=(noise, self.time_first_input), initializer=(tf.zeros(time_indices_shape), tf.zeros(time_indices_shape), tf.zeros(time_indices_shape), tf.zeros(time_indices_shape)), back_prop=True ) # 4 * window_length * n_samples * batch_size * z_dim samples = tf.transpose(results[0], [1, 2, 0, 3]) # n_samples * batch_size * window_length * z_dim log_prob = tf.transpose(results[-1], [1, 2, 0, 3]) # (n_samples, batch_size, window_length, z_dim) if self.need_unflatten: # unflatten to (n_samples, n_samples_of_input_tensor, batch_size, window_length, z_dim) samples = tf.stack([spt.ops.unflatten_from_ndims(samples[i], self.s1, self.s2) for i in range(n_samples)], axis=0) log_prob = tf.stack([spt.ops.unflatten_from_ndims(log_prob[i], self.s1, self.s2) for i in range(n_samples)], axis=0) log_prob = spt.reduce_group_ndims(tf.reduce_sum, log_prob, group_ndims) if n_samples_is_none: t = StochasticTensor( distribution=self, tensor=tf.reduce_mean(samples, axis=0), group_ndims=group_ndims, is_reparameterized=self.is_reparameterized, log_prob=tf.reduce_mean(log_prob, axis=0) ) self._mu = tf.reduce_mean(tf.transpose(results[1], [1, 2, 0, 3]), axis=0) self._logstd = tf.reduce_mean(tf.transpose(results[2], [1, 2, 0, 3]), axis=0) if self.need_unflatten: self._mu = spt.ops.unflatten_from_ndims(self._mu, self.s1, self.s2) self._logstd = spt.ops.unflatten_from_ndims(self._logstd, self.s1, self.s2) else: t = StochasticTensor( distribution=self, tensor=samples, n_samples=n_samples, group_ndims=group_ndims, is_reparameterized=self.is_reparameterized, log_prob=log_prob ) self._mu = tf.transpose(results[1], [1, 2, 0, 3]) self._logstd = tf.transpose(results[2], [1, 2, 0, 3]) if self.need_unflatten: self._mu = tf.stack([spt.ops.unflatten_from_ndims(self._mu[i], self.s1, self.s2) for i in range(n_samples)], axis=0) self._logstd = tf.stack([spt.ops.unflatten_from_ndims(self._logstd[i], self.s1, self.s2) for i in range(n_samples)], axis=0) return t def log_prob(self, given, group_ndims=0, name=None): with tf.name_scope(name=name, default_name='log_prob'): if self.need_unflatten: assert len(given.shape) == len(self.origin_input.shape) assert given.shape[0] == self.origin_input.shape[0] time_first_input = tf.transpose(self.origin_input, [2, 0, 1, 3]) # (window, sample, batch, feature) # time_indices_shape: (n_sample, batch_size, z_dim) time_indices_shape = tf.convert_to_tensor([tf.shape(given)[0], tf.shape(time_first_input)[2], self.z_dim]) given = tf.transpose(given, [2, 0, 1, 3]) else: if len(given.shape) > 3: # (n_sample, batch_size, window_length, z_dim) time_indices_shape = tf.convert_to_tensor([tf.shape(given)[0], tf.shape(self.time_first_input)[1], self.z_dim]) given = tf.transpose(given, [2, 0, 1, 3]) time_first_input = self.time_first_input else: # (batch_size, window_length, z_dim) time_indices_shape = tf.convert_to_tensor([tf.shape(self.time_first_input)[1], self.z_dim]) given = tf.transpose(given, [1, 0, 2]) time_first_input = self.time_first_input results = tf.scan(fn=self.log_prob_step, elems=(given, time_first_input), initializer=(tf.zeros(time_indices_shape), tf.zeros(time_indices_shape), tf.zeros(time_indices_shape), tf.zeros(time_indices_shape)), back_prop=True ) # (window_length, ?, batch_size, z_dim) if len(given.shape) > 3: log_prob = tf.transpose(results[-1], [1, 2, 0, 3]) else: log_prob = tf.transpose(results[-1], [1, 0, 2]) log_prob = spt.reduce_group_ndims(tf.reduce_sum, log_prob, group_ndims) return log_prob def prob(self, given, group_ndims=0, name=None): with tf.name_scope(name=name, default_name='prob'): log_prob = self.log_prob(given, group_ndims, name) return tf.exp(log_prob) ================================================ FILE: algorithm/stack_predict.py ================================================ import mltk import os from explib.eval_methods import get_best_f1, get_adjusted_composite_metrics from algorithm.utils import GraphNodes, get_data, time_generator, get_sliding_window_data_flow, get_score, \ get_avg_recons import tfsnippet as spt import tensorflow as tf from tqdm import tqdm from algorithm.InterFusion import MTSAD from algorithm.InterFusion_swat import MTSAD_SWAT import numpy as np from typing import Optional import pickle from algorithm.mcmc_recons import mcmc_reconstruct, masked_reconstruct from algorithm.cal_IPS import cal_IPS __all__ = ['PredictConfig', 'final_testing', 'build_test_graph'] class PredictConfig(mltk.Config): load_model_dir: Optional[str] # evaluation params test_n_z = 100 test_batch_size = 50 test_start = 0 max_test_size = None # `None` means full test set save_results = True output_dirs = 'analysis_results' train_score_filename = 'train_score.pkl' test_score_filename = 'test_score.pkl' preserve_feature_dim = False # whether to preserve the feature dim in score. If `True`, the score will be a 2-dim ndarray anomaly_score_calculate_latency = 1 # How many scores are averaged for the final score at a timestamp. `1` means use last point in each sliding window only. plot_recons_results = True use_mcmc = True # use mcmc on the last point for anomaly detection mcmc_iter = 10 mcmc_rand_mask = False n_mc_chain: int = 10 pos_mask = True mcmc_track = True # use mcmc tracker for anomaly interpretation and calculate IPS. def build_test_graph(chain: spt.VariationalChain, input_x, origin_chain: spt.VariationalChain=None) -> GraphNodes: test_recons = tf.reduce_mean(chain.model['x'].log_prob(), axis=0) logpx = chain.model['x'].log_prob() logpz = chain.model['z2'].log_prob() + chain.model['z1'].log_prob() logqz_x = chain.variational['z1'].log_prob() + chain.variational['z2'].log_prob() test_lb = tf.reduce_mean(logpx + logpz - logqz_x, axis=0) log_joint = logpx + logpz latent_log_prob = logqz_x test_ll = spt.importance_sampling_log_likelihood(log_joint=log_joint, latent_log_prob=latent_log_prob, axis=0) test_nll = -test_ll # average over sample dim if origin_chain is not None: full_recons_prob = tf.reduce_mean( (chain.model['x'].distribution.base_distribution.log_prob(input_x) - origin_chain.model['x'].distribution.base_distribution.log_prob(input_x)), axis=0 ) else: full_recons_prob = tf.reduce_mean(chain.model['x'].distribution.base_distribution.log_prob(input_x), axis=0) if origin_chain is not None: origin_log_joint = origin_chain.model['x'].log_prob() + origin_chain.model['z1'].log_prob() + origin_chain.model['z2'].log_prob() origin_latent_log_prob = origin_chain.variational['z1'].log_prob() + origin_chain.variational['z2'].log_prob() origin_ll = spt.importance_sampling_log_likelihood(log_joint=origin_log_joint, latent_log_prob=origin_latent_log_prob, axis=0) test_ll_score = test_ll - origin_ll else: test_ll_score = test_ll outputs = { 'test_nll': test_nll, 'test_lb': test_lb, 'test_recons': test_recons, 'test_kl': test_recons - test_lb, 'full_recons_prob': full_recons_prob, 'test_ll': test_ll_score } return GraphNodes(outputs) def build_recons_graph(chain: spt.VariationalChain, window_length, feature_dim, unified_x_std=False) -> GraphNodes: # average over sample dim recons_x = tf.reduce_mean(chain.model['x'].distribution.base_distribution.mean, axis=0) recons_x = spt.utils.InputSpec(shape=['?', window_length, feature_dim]).validate('recons', recons_x) if unified_x_std: recons_x_std = chain.model['x'].distribution.base_distribution.std recons_x_std = spt.ops.broadcast_to_shape(recons_x_std, tf.shape(recons_x)) else: recons_x_std = tf.reduce_mean(chain.model['x'].distribution.base_distribution.std, axis=0) recons_x_std = spt.utils.InputSpec(shape=['?', window_length, feature_dim]).validate('recons_std', recons_x_std) return GraphNodes({'recons_x': recons_x, 'recons_x_std': recons_x_std}) def get_recons_results(recons_nodes: GraphNodes, input_x, input_u, data_flow: spt.DataFlow, total_batch_count, dataset, mask=None, rand_x=None): data_flow = data_flow.threaded(5) recons_collector = [] recons_std_collector = [] session = spt.utils.get_default_session_or_error() with data_flow: for batch_x, batch_u in tqdm(data_flow, unit='step', total=total_batch_count, ascii=True): if mask is not None: batch_mask = np.zeros(shape=batch_x.shape) batch_mask[:, -1, :] = 1 # mask all dims of the last point in x if rand_x is not None: batch_output = recons_nodes.eval(session, feed_dict={input_x: batch_x, input_u: batch_u, mask: batch_mask, rand_x: np.random.random(batch_x.shape)}) else: batch_output = recons_nodes.eval(session, feed_dict={input_x: batch_x, input_u: batch_u, mask: batch_mask}) else: batch_output = recons_nodes.eval(session, feed_dict={input_x: batch_x, input_u: batch_u}) for k, v in batch_output.items(): if k == 'recons_x': if dataset == 'SWaT' or dataset == 'WADI': # idx = min(10, v.shape[1]) recons_collector.append(v[:, -10:, :]) else: recons_collector.append(v) elif k == 'recons_x_std': if dataset == 'SWaT' or dataset == 'WADI': # idx = min(10, v.shape[1]) recons_std_collector.append(v[:, -10:, :]) else: recons_std_collector.append(v) all_recons = np.concatenate(recons_collector, axis=0) # (data_length - window_length + 1, window_length, x_dim) print(all_recons.shape) all_recons_std = np.concatenate(recons_std_collector, axis=0) return all_recons, all_recons_std def final_testing(test_metrics: GraphNodes, input_x, input_u, data_flow: spt.DataFlow, total_batch_count, y_test=None, mask=None, rand_x=None): data_flow = data_flow.threaded(5) full_recons_collector = [] ll_collector = [] epoch_out = {} stats = {} session = spt.utils.get_default_session_or_error() with data_flow: for batch_x, batch_u in tqdm(data_flow, unit='step', total=total_batch_count, ascii=True): if mask is not None: batch_mask = np.zeros(shape=batch_x.shape) batch_mask[:, -1, :] = 1 # mask all dims of the last point in x if rand_x is not None: batch_output = test_metrics.eval(session, feed_dict={input_x: batch_x, input_u: batch_u, mask: batch_mask, rand_x: np.random.random(batch_x.shape)}) else: batch_output = test_metrics.eval(session, feed_dict={input_x: batch_x, input_u: batch_u, mask: batch_mask}) else: batch_output = test_metrics.eval(session, feed_dict={input_x: batch_x, input_u: batch_u}) for k, v in batch_output.items(): if k == 'full_recons_prob': full_recons_collector.append(v) elif k == 'test_ll': ll_collector.append(v) if k not in epoch_out: epoch_out[k] = [] epoch_out[k].append(v) else: if k not in epoch_out: epoch_out[k] = [] epoch_out[k].append(v) # save the results of this epoch, and compute epoch stats. Take average over both batch and window_length dim. for k, v in epoch_out.items(): epoch_out[k] = np.concatenate(epoch_out[k], axis=0) if k not in stats: stats[k] = [] stats[k].append(float(np.mean(epoch_out[k]))) # collect full recons prob for calculate anomaly score full_recons_probs = np.concatenate(full_recons_collector, axis=0) # (data_length-window_length+1, window_length, x_dim) ll = np.concatenate(ll_collector, axis=0) if y_test is not None: assert full_recons_probs.shape[0] + full_recons_probs.shape[1] - 1 == len(y_test) tmp1 = [] for i in range(full_recons_probs.shape[0]): if y_test[i + full_recons_probs.shape[1] - 1] < 0.5: tmp1.append(np.sum(full_recons_probs[i, -1], axis=-1)) # normal point recons score stats['normal_point_test_recons'] = [float(np.mean(tmp1))] # calculate average statistics for k, v in stats.items(): stats[k] = float(np.mean(v)) return stats, full_recons_probs, ll def mcmc_tracker(flow: spt.DataFlow, baseline, model, input_x, input_u, mask, max_iter, total_window_num, window_length, x_dim, mask_last=False, pos_mask=False, use_rand_mask=False, n_mc_chain=1): # the baseline is the avg total score in a window on training set. session = spt.utils.get_default_session_or_error() last_x = tf.placeholder(dtype=tf.float32, shape=[None, window_length, x_dim], name='last_x') x_r = masked_reconstruct(model.reconstruct, last_x, input_u, mask) score, recons_mean, recons_std = model.get_score(x_embed=x_r, x_eval=input_x, u=input_u) tot_score = tf.reduce_sum(tf.multiply(score, tf.cast((1-mask), score.dtype))) def avg_multi_chain(x, n_chain): shape = (-1,) + (n_chain,) + x.shape[1:] return np.mean(x.reshape(shape), axis=1) res = {} with flow.threaded(5) as flow: for batch_x, batch_u, batch_score, batch_ori_recons, batch_ori_std, batch_idx \ in tqdm(flow, unit='step', total=total_window_num, ascii=True): batch_idx = batch_idx[0] res[batch_idx] = {'x': [batch_x], 'recons': [batch_ori_recons], 'std': [batch_ori_std], 'score': [batch_score], 'K': [0], 'iter': [-1], 'mask': [np.zeros(shape=batch_x.shape)], 'total_score': [np.mean(np.sum(batch_score, axis=-1))]} best_score = batch_score best_total_score = np.mean(np.sum(batch_score, axis=-1)) best_K = 0 if pos_mask: pos_scores = np.mean(batch_score, axis=0) # (window, x_dim) sorted_pos_idx = np.argsort(pos_scores, axis=None) potential_dim_num = np.sum((pos_scores < (baseline/(x_dim*window_length))).astype(np.int32)) else: dim_scores = np.mean(batch_score, axis=(-2,-3)) # (x_dim, ) sorted_dim_idx = np.argsort(dim_scores) potential_dim_num = np.sum((dim_scores < (baseline/(x_dim*window_length))).astype(np.int32)) # num of dims whose avg score < baseline if potential_dim_num > 0: K_init = max(potential_dim_num//5, 1) K_inc = max(potential_dim_num//10, 1) else: res[batch_idx]['best_score'] = best_score res[batch_idx]['best_total_score'] = best_total_score res[batch_idx]['best_K'] = best_K continue if use_rand_mask: rand_x = np.random.random(size=batch_x.shape) if pos_mask: max_K = x_dim * window_length else: max_K = x_dim for K in range(K_init, min(potential_dim_num+1, max_K), K_inc): if pos_mask: mask_idx = sorted_pos_idx[:K] batch_mask = np.zeros(shape=batch_x.shape) batch_mask = batch_mask.reshape([batch_x.shape[0], -1]) batch_mask[:, mask_idx] = 1 batch_mask = batch_mask.reshape(batch_x.shape) else: mask_idx = sorted_dim_idx[:K] batch_mask = np.zeros(shape=batch_x.shape) batch_mask[:, :, mask_idx] = 1 if mask_last: batch_mask[:, -1, :] = 1 batch_last_x = batch_x if use_rand_mask: batch_last_x = np.where(batch_mask.astype(np.bool), rand_x, batch_last_x) if n_mc_chain > 1: init_x = np.repeat(batch_x, n_mc_chain, axis=0) init_u = np.repeat(batch_u, n_mc_chain, axis=0) init_mask = np.repeat(batch_mask, n_mc_chain, axis=0) init_last_x = np.repeat(batch_last_x, n_mc_chain, axis=0) for i in range(max_iter): if n_mc_chain > 1: x_mc, x_recons, x_std, x_score, x_tot_score = \ session.run([x_r, recons_mean, recons_std, score, tot_score], feed_dict={input_x: init_x, input_u: init_u, mask: init_mask, last_x: init_last_x}) init_last_x = x_mc x_mc = avg_multi_chain(x_mc, n_mc_chain) x_recons = avg_multi_chain(x_recons, n_mc_chain) x_std = avg_multi_chain(x_std, n_mc_chain) x_score = avg_multi_chain(x_score, n_mc_chain) x_tot_score = float(x_tot_score) / float(n_mc_chain) else: x_mc, x_recons, x_std, x_score, x_tot_score = \ session.run([x_r, recons_mean, recons_std, score, tot_score], feed_dict={input_x: batch_x, input_u: batch_u, mask: batch_mask, last_x: batch_last_x}) batch_last_x = x_mc total_score = float(x_tot_score) / (window_length * x_dim - np.sum(batch_mask)) / batch_x.shape[0] * x_dim res[batch_idx]['x'].append(x_mc) res[batch_idx]['recons'].append(x_recons) res[batch_idx]['std'].append(x_std) res[batch_idx]['score'].append(x_score) res[batch_idx]['K'].append(K) res[batch_idx]['iter'].append(i) res[batch_idx]['mask'].append(batch_mask) res[batch_idx]['total_score'].append(total_score) last_score = res[batch_idx]['total_score'][-1] if last_score >= best_total_score: best_total_score = last_score best_score = res[batch_idx]['score'][-1] best_K = res[batch_idx]['K'][-1] if best_total_score >= (baseline/window_length): break res[batch_idx]['best_score'] = best_score res[batch_idx]['best_total_score'] = best_total_score res[batch_idx]['best_K'] = best_K return res def log_mean_exp(x, axis, keepdims=False): x_max = np.max(x, axis=axis, keepdims=True) ret = x_max + np.log(np.mean(np.exp(x - x_max), axis=axis, keepdims=True)) if not keepdims: ret = np.squeeze(ret, axis=axis) return ret def log_sum_exp(x, axis, keepdims=False): x_max = np.max(x, axis=axis, keepdims=True) ret = x_max + np.log(np.sum(np.exp(x - x_max), axis=axis, keepdims=True)) if not keepdims: ret = np.squeeze(ret, axis=axis) return ret def main(exp: mltk.Experiment[PredictConfig], test_config: PredictConfig): if test_config.load_model_dir is None: raise ValueError('`--load_model_dir` is required.') exp_config_path = os.path.join(test_config.load_model_dir, 'config.json') from algorithm.stack_train import ExpConfig loader = mltk.ConfigLoader(ExpConfig()) loader.load_file(exp_config_path) train_config = loader.get() print(mltk.format_key_values(train_config, title='Train configurations')) print('') print(mltk.format_key_values(test_config, title='Test configurations')) print('') # set TFSnippet settings spt.settings.enable_assertions = False spt.settings.check_numerics = train_config.check_numerics exp.make_dirs(test_config.output_dirs) # prepare the data # simple data (x_train, _), (x_test, y_test) = \ get_data(train_config.dataset, train_config.train.max_train_size, train_config.test.max_test_size, train_start=train_config.train.train_start, test_start=train_config.test.test_start, valid_portion=train_config.train.valid_portion) if train_config.use_time_info: u_train = np.asarray([time_generator(_i) for _i in range(len(x_train))]) # (train_size, u_dim) u_test = np.asarray([time_generator(len(x_train) + _i) for _i in range(len(x_test))]) # (test_size, u_dim) else: u_train = np.zeros([len(x_train), train_config.model.u_dim]) # (train_size, u_dim) u_test = np.zeros([len(x_test), train_config.model.u_dim]) # prepare data_flow test_flow = get_sliding_window_data_flow(window_size=train_config.model.window_length, batch_size=test_config.test_batch_size, x=x_test, u=u_test, shuffle=False, skip_incomplete=False) evaluate_score_train_flow = get_sliding_window_data_flow(window_size=train_config.model.window_length, batch_size=test_config.test_batch_size, x=x_train, u=u_train, shuffle=False, skip_incomplete=False) # build computation graph if train_config.dataset == 'SWaT' or train_config.dataset == 'WADI': model = MTSAD_SWAT(train_config.model, scope='model') else: model = MTSAD(train_config.model, scope='model') # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=[None, train_config.model.window_length, train_config.model.x_dim], name='input_x') input_u = tf.placeholder(dtype=tf.float32, shape=[None, train_config.model.window_length, train_config.model.u_dim], name='input_u') mask = tf.placeholder(dtype=tf.int32, shape=[None, train_config.model.window_length, train_config.model.x_dim], name='mask') rand_x = tf.placeholder(dtype=tf.float32, shape=[None, train_config.model.window_length, train_config.model.x_dim], name='rand_x') tmp_out = None if test_config.use_mcmc: with tf.name_scope('mcmc_init'): tmp_qnet = model.q_net(input_x, u=input_u, n_z=test_config.test_n_z) tmp_chain = tmp_qnet.chain(model.p_net, observed={'x': input_x}, latent_axis=0, u=input_u) tmp_out = tf.reduce_mean(tmp_chain.vi.lower_bound.elbo()) # derive testing nodes with tf.name_scope('testing'): if test_config.use_mcmc: if test_config.mcmc_rand_mask: # use random value to mask the initial input for mcmc (otherwise use the original one) if test_config.n_mc_chain > 1: # average the results of multi-mcmc chain for each input x. init_x = tf.where(tf.cast(mask, dtype=tf.bool), rand_x, input_x) init_x, s1, s2 = spt.ops.flatten_to_ndims(tf.tile(tf.expand_dims(init_x, 1), [1, test_config.n_mc_chain, 1, 1]), 3) init_u, _, _ = spt.ops.flatten_to_ndims(tf.tile(tf.expand_dims(input_u, 1), [1, test_config.n_mc_chain, 1, 1]), 3) init_mask, _, _ = spt.ops.flatten_to_ndims(tf.tile(tf.expand_dims(mask, 1), [1, test_config.n_mc_chain, 1, 1]), 3) x_mcmc = mcmc_reconstruct(model.reconstruct, init_x, init_u, init_mask, test_config.mcmc_iter, back_prop=False) x_mcmc = spt.ops.unflatten_from_ndims(x_mcmc, s1, s2) x_mcmc = tf.reduce_mean(x_mcmc, axis=1) else: init_x = tf.where(tf.cast(mask, dtype=tf.bool), rand_x, input_x) x_mcmc = mcmc_reconstruct(model.reconstruct, init_x, input_u, mask, test_config.mcmc_iter, back_prop=False) else: if test_config.n_mc_chain > 1: init_x, s1, s2 = spt.ops.flatten_to_ndims(tf.tile(tf.expand_dims(input_x, 1), [1, test_config.n_mc_chain, 1, 1]), 3) init_u, _, _ = spt.ops.flatten_to_ndims(tf.tile(tf.expand_dims(input_u, 1), [1, test_config.n_mc_chain, 1, 1]), 3) init_mask, _, _ = spt.ops.flatten_to_ndims(tf.tile(tf.expand_dims(mask, 1), [1, test_config.n_mc_chain, 1, 1]), 3) x_mcmc = mcmc_reconstruct(model.reconstruct, init_x, init_u, init_mask, test_config.mcmc_iter, back_prop=False) x_mcmc = spt.ops.unflatten_from_ndims(x_mcmc, s1, s2) x_mcmc = tf.reduce_mean(x_mcmc, axis=1) else: x_mcmc = mcmc_reconstruct(model.reconstruct, input_x, input_u, mask, test_config.mcmc_iter, back_prop=False) else: x_mcmc = input_x test_q_net = model.q_net(x_mcmc, u=input_u, n_z=test_config.test_n_z) test_chain = test_q_net.chain(model.p_net, observed={'x': input_x}, latent_axis=0, u=input_u) test_metrics = build_test_graph(test_chain, input_x) if test_config.plot_recons_results: recons_nodes = build_recons_graph(test_chain, train_config.model.window_length, train_config.model.x_dim, train_config.model.unified_px_logstd) # obtain params to restore variables_to_restore = tf.global_variables() restore_path = os.path.join(test_config.load_model_dir, 'result_params/restored_params.dat') # obtain the variables initializer var_initializer = tf.variables_initializer(tf.global_variables()) test_flow = test_flow.threaded(5) evaluate_score_train_flow = evaluate_score_train_flow.threaded(5) with spt.utils.create_session().as_default() as session: session.run(var_initializer) saver = tf.train.Saver(var_list=variables_to_restore) saver.restore(session, restore_path) print('Model params restored.') # Evaluate the whole network if test_config.use_mcmc: for batch_x, batch_u in test_flow: _ = session.run(tmp_out, feed_dict={input_x: batch_x, input_u: batch_u}) break # do evaluation print('') print('*************Evaluate score on testing set************') test_batch_count = (len(x_test) - train_config.model.window_length + test_config.test_batch_size) // test_config.test_batch_size test_stats, test_full_recons_probs, test_ll = final_testing(test_metrics, input_x, input_u, test_flow, test_batch_count, y_test, mask=mask if test_config.use_mcmc else None, rand_x=rand_x if test_config.mcmc_rand_mask else None) print('') print(mltk.format_key_values(test_stats, 'Final testing statistics')) exp.update_results(test_stats) test_score = get_score(test_full_recons_probs, preserve_feature_dim=test_config.preserve_feature_dim, score_avg_window_size=test_config.anomaly_score_calculate_latency) # evaluate score on train set print('') print('*************Evaluate score on training set************') if train_config.dataset != 'WADI': train_set_batch_count = (len(x_train) - train_config.model.window_length + test_config.test_batch_size) // test_config.test_batch_size train_stats, train_full_recons_probs, _ = final_testing(test_metrics, input_x, input_u, evaluate_score_train_flow, train_set_batch_count, mask=mask if test_config.use_mcmc else None, rand_x=rand_x if test_config.mcmc_rand_mask else None) print('') print(mltk.format_key_values(train_stats, 'Training set evaluation statistics')) train_score = get_score(train_full_recons_probs, preserve_feature_dim=test_config.preserve_feature_dim, score_avg_window_size=test_config.anomaly_score_calculate_latency) print('train_score shape: ', train_score.shape) if test_config.save_results: np.savez(os.path.join((exp.abspath(test_config.output_dirs)), 'full_recons_window_probs.npz'), full_full_test_recons_window_probs=test_full_recons_probs, full_train_recons_window_probs=get_score(train_full_recons_probs, preserve_feature_dim=True, score_avg_window_size=test_config.anomaly_score_calculate_latency), full_test_recons_window_probs=get_score(test_full_recons_probs, preserve_feature_dim=True, score_avg_window_size=test_config.anomaly_score_calculate_latency)) if not test_config.mcmc_track: del test_full_recons_probs del train_full_recons_probs # for reconstruct plotting import matplotlib.pyplot as plt exp.make_dirs('figures') if test_config.plot_recons_results: print('') print('*************Calculating and plotting reconstruction data************') all_test_recons, all_test_recons_std = get_recons_results(recons_nodes, input_x, input_u, test_flow, test_batch_count, train_config.dataset, mask=mask if test_config.use_mcmc else None, rand_x=rand_x if test_config.mcmc_rand_mask else None) all_train_recons, all_train_recons_std = get_recons_results(recons_nodes, input_x, input_u, evaluate_score_train_flow, train_set_batch_count, train_config.dataset, mask=mask if test_config.use_mcmc else None, rand_x=rand_x if test_config.mcmc_rand_mask else None) final_test_recons = get_avg_recons(all_test_recons, window_length=train_config.model.window_length, recons_avg_window_size=test_config.anomaly_score_calculate_latency) final_train_recons = get_avg_recons(all_train_recons, window_length=train_config.model.window_length, recons_avg_window_size=test_config.anomaly_score_calculate_latency) if not test_config.mcmc_track: del all_test_recons del all_train_recons if test_config.anomaly_score_calculate_latency == 1: final_test_recons_std = get_avg_recons(all_test_recons_std, window_length=train_config.model.window_length) final_train_recons_std = get_avg_recons(all_train_recons_std, window_length=train_config.model.window_length) np.savez(os.path.join(exp.abspath(test_config.output_dirs), 'recons_plotting_data.npz'), x_train=x_train, x_test=x_test, x_train_recons=final_train_recons, x_train_recons_std=final_train_recons_std, x_test_recons=final_test_recons, x_test_recons_std=final_test_recons_std, y_test=y_test) else: # average x_recons_std is meaningless if calculate latency > 1 np.savez(os.path.join(exp.abspath(test_config.output_dirs), 'recons_plotting_data.npz'), x_train=x_train, x_test=x_test, x_train_recons=final_train_recons, x_test_recons=final_test_recons, y_test=y_test) if not test_config.mcmc_track: del all_test_recons_std del all_train_recons_std if test_config.train_score_filename is not None and train_config.dataset != 'WADI': with open(os.path.join(exp.abspath(test_config.output_dirs), test_config.train_score_filename), 'wb') as file: pickle.dump(train_score, file) if test_config.test_score_filename is not None: with open(os.path.join(exp.abspath(test_config.output_dirs), test_config.test_score_filename), 'wb') as file: pickle.dump(test_score, file) print('') print('*************Calculating best F1-score*****************') y_test = y_test[-len(test_score):] # get best f1 t, th = get_best_f1(test_score, y_test) best_thresh = th # output the results exp.update_results({ 'best-f1': t[0], 'precision': t[1], 'recall': t[2], 'TP': t[3], 'TN': t[4], 'FP': t[5], 'FN': t[6], 'threshold': th }) auroc, ap, _, _, _, _, _ = get_adjusted_composite_metrics(test_score, y_test) exp.update_results({ 'auroc': auroc, 'ap': ap }) if test_config.mcmc_track: # find the TP points that need to be interpreted according to best_thresh best_idx = np.logical_and(test_score <= best_thresh, y_test > 0.5) best_idx = np.where(best_idx)[0] total_x, total_u, total_score, total_recons, total_std, total_idx = [], [], [], [], [], [] for i in best_idx: total_x.append(x_test[i:i+train_config.model.window_length, :]) total_u.append(u_test[i:i+train_config.model.window_length, :]) total_score.append(test_full_recons_probs[i]) total_recons.append(all_test_recons[i]) total_std.append(all_test_recons_std[i]) total_idx.append(i + train_config.model.window_length - 1) total_x = np.stack(total_x, axis=0) total_u = np.stack(total_u, axis=0) total_score = np.stack(total_score, axis=0) total_recons = np.stack(total_recons, axis=0) total_std = np.stack(total_std, axis=0) total_idx = np.stack(total_idx, axis=0) best_flow = spt.DataFlow.arrays([total_x, total_u, total_score, total_recons, total_std, total_idx], batch_size=1, shuffle=False, skip_incomplete=False) best_baseline = train_stats['test_recons'] best_window_num = total_x.shape[0] res = mcmc_tracker(best_flow, best_baseline, model, input_x, input_u, mask, test_config.mcmc_iter, best_window_num, train_config.model.window_length, train_config.model.x_dim, test_config.use_mcmc, test_config.pos_mask, test_config.mcmc_rand_mask, test_config.n_mc_chain) with open(os.path.join(exp.abspath(test_config.output_dirs), 'mcmc_tracker.pkl'), 'wb') as file: pickle.dump(res, file) del res res = cal_IPS(path=exp.abspath(test_config.output_dirs), dataset=train_config.dataset, mcmc=True, is_pretrain=False) exp.update_results({'IPS': res['mcmc_p=100: wd_last_itv_min_weight'], 'IPS@150%': res['mcmc_p=150: wd_last_itv_min_weight']}) print('') print(mltk.format_key_values(exp.results), 'Results') if __name__ == '__main__': with mltk.Experiment(PredictConfig()) as exp: main(exp, exp.config) ================================================ FILE: algorithm/stack_train.py ================================================ # -*- coding: utf-8 -*- import os import logging import time import numpy as np import tensorflow as tf import tfsnippet as spt from tfsnippet.scaffold import TrainLoop from tfsnippet.trainer import Trainer, Evaluator import mltk from algorithm.utils import get_data_dim, get_data, get_sliding_window_data_flow, time_generator, GraphNodes import random from algorithm.InterFusion import ModelConfig, MTSAD from algorithm.InterFusion_swat import MTSAD_SWAT from algorithm.stack_predict import PredictConfig class TrainConfig(mltk.Config): # training params batch_size = 100 pretrain_max_epoch = 20 max_epoch = 20 train_start = 0 max_train_size = None # `None` means full train set initial_lr = 0.001 lr_anneal_factor = 0.5 lr_anneal_epoch_freq = 10 lr_anneal_step_freq = None pretrain_lr_anneal_epoch_freq = 10 early_stopping = True valid_portion = 0.3 save_test_stats = True class ExpConfig(mltk.Config): seed = int(time.time()) dataset = 'omi-1' # model params model = ModelConfig() @mltk.root_checker() def _model_post_checker(self, v: 'ExpConfig'): if v.model.x_dim == -1: v.model.x_dim = get_data_dim(v.dataset) if v.dataset == 'SWaT': v.model.z_dim = 2 if v.dataset == 'WADI': v.model.z_dim = 4 use_time_info = False # whether to use time information (minute, hour, day) as input u. discarded. model_type = 'mtsad' # train params train = TrainConfig() @mltk.root_checker() def _train_post_checker(self, v: 'ExpConfig'): if v.dataset == 'SWaT' or v.dataset == 'WADI': v.train.max_epoch = 15 v.train.save_test_stats = False v.train.pretrain_max_epoch = 10 v.train.pretrain_lr_anneal_epoch_freq = 5 v.train.lr_anneal_epoch_freq = 5 if v.dataset == 'SWaT': v.train.initial_lr = 0.0005 if v.dataset == 'WADI': v.train.initial_lr = 0.0002 test = PredictConfig() # debugging params write_summary = False write_histogram_summary = False check_numerics = False save_results = True save_ckpt = True ckpt_epoch_freq = 10 ckpt_max_keep = 10 pretrain_ckpt_epoch_freq = 20 pretrain_ckpt_max_keep = 10 exp_dir_save_path = None # The file path to save the exp dirs for batch run training on different datasets. def get_lr_value(init_lr, anneal_factor, anneal_freq, loop: spt.TrainLoop, ) -> spt.DynamicValue: """ Get the learning rate scheduler for specified experiment. Args: exp: The experiment object. loop: The train loop object. Returns: A dynamic value, which returns the learning rate each time its `.get()` is called. """ return spt.AnnealingScalar( loop=loop, initial_value=init_lr, ratio=anneal_factor, epochs=anneal_freq, ) def sgvb_loss(qnet, pnet, metrics_dict: GraphNodes, prefix='train_', name=None): with tf.name_scope(name, default_name='sgvb_loss'): logpx_z = pnet['x'].log_prob(name='logpx_z') logpz1_z2 = pnet['z1'].log_prob(name='logpz1_z2') logpz2 = pnet['z2'].log_prob(name='logpz2') logpz = logpz1_z2 + logpz2 logqz1_x = qnet['z1'].log_prob(name='logqz1_x') logqz2_x = qnet['z2'].log_prob(name='logqz2_x') logqz_x = logqz1_x + logqz2_x recons_term = tf.reduce_mean(logpx_z) kl_term = tf.reduce_mean(logqz_x - logpz) metrics_dict[prefix + 'recons'] = recons_term metrics_dict[prefix + 'kl'] = kl_term return -tf.reduce_mean(logpx_z + logpz - logqz_x) def main(exp: mltk.Experiment[ExpConfig], config: ExpConfig): logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s' ) # print the current seed and generate three seeds logging.info('Current random seed: %s', config.seed) np.random.seed(config.seed) random.seed(np.random.randint(0xffffffff)) tf.set_random_seed(np.random.randint(0xffffffff)) np.random.seed(np.random.randint(0xffffffff)) spt.settings.check_numerics = config.check_numerics spt.settings.enable_assertions = False # print the config print(mltk.format_key_values(config, title='Configurations')) print('') # open the result object and prepare for result directories exp.make_dirs('train_summary') exp.make_dirs('result_params') exp.make_dirs('ckpt_params') exp.make_dirs(config.test.output_dirs) # prepare the data # simple data (x_train, _), (x_test, y_test) = \ get_data(config.dataset, config.train.max_train_size, config.test.max_test_size, train_start=config.train.train_start, test_start=config.test.test_start, valid_portion=config.train.valid_portion) if config.use_time_info: u_train = np.asarray([time_generator(_i) for _i in range(len(x_train))]) # (train_size, u_dim) u_test = np.asarray([time_generator(len(x_train) + _i) for _i in range(len(x_test))]) # (test_size, u_dim) else: u_train = np.zeros([len(x_train), config.model.u_dim]) # (train_size, u_dim) u_test = np.zeros([len(x_test), config.model.u_dim]) split_idx = int(len(x_train) * config.train.valid_portion) x_train, x_valid = x_train[:-split_idx], x_train[-split_idx:] u_train, u_valid = u_train[:-split_idx], u_train[-split_idx:] # prepare data_flow train_flow = get_sliding_window_data_flow(window_size=config.model.window_length, batch_size=config.train.batch_size, x=x_train, u=u_train, shuffle=True, skip_incomplete=True) valid_flow = get_sliding_window_data_flow(window_size=config.model.window_length, batch_size=config.train.batch_size, x=x_valid, u=u_valid, shuffle=False, skip_incomplete=False) # build computation graph if config.dataset == 'SWaT' or config.dataset == 'WADI': model = MTSAD_SWAT(config.model, scope='model') else: model = MTSAD(config.model, scope='model') # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=[None, config.model.window_length, config.model.x_dim], name='input_x') input_u = tf.placeholder(dtype=tf.float32, shape=[None, config.model.window_length, config.model.u_dim], name='input_u') learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') # derive training nodes with tf.name_scope('training'): # pretrain time-vae to get z2 pretrain_q_net = model.pretrain_q_net(input_x, is_training=is_training) pretrain_chain = pretrain_q_net.chain(model.pretrain_p_net, observed={'x': input_x}, is_training=is_training) pretrain_loss = tf.reduce_mean(pretrain_chain.vi.training.sgvb()) + tf.losses.get_regularization_loss() pretrain_train_recons = tf.reduce_mean(pretrain_chain.model['x'].log_prob()) # train the whole network with z1 and z2 train_q_net = model.q_net(input_x, u=input_u, is_training=is_training) train_chain = train_q_net.chain(model.p_net, observed={'x': input_x}, u=input_u, is_training=is_training) train_metrics = GraphNodes() vae_loss = sgvb_loss(train_chain.variational, train_chain.model, train_metrics, name='train_sgvb_loss') reg_loss = tf.losses.get_regularization_loss() loss = vae_loss + reg_loss train_metrics['loss'] = loss with tf.name_scope('validation'): # pretrain validation pretrain_valid_q_net = model.pretrain_q_net(input_x, n_z=config.test.test_n_z) pretrain_valid_chain = pretrain_valid_q_net.chain(model.pretrain_p_net, observed={'x': input_x}, latent_axis=0) pretrain_valid_loss = tf.reduce_mean(pretrain_valid_chain.vi.training.sgvb()) + tf.losses.get_regularization_loss() pretrain_valid_recons = tf.reduce_mean(pretrain_valid_chain.model['x'].log_prob()) # validation of the whole network valid_q_net = model.q_net(input_x, u=input_u, n_z=config.test.test_n_z) valid_chain = valid_q_net.chain(model.p_net, observed={'x': input_x}, latent_axis=0, u=input_u) valid_metrics = GraphNodes() valid_loss = sgvb_loss(valid_chain.variational, valid_chain.model, valid_metrics, prefix='valid_', name='valid_sgvb_loss') + tf.losses.get_regularization_loss() valid_metrics['valid_loss'] = valid_loss # pretrain pre_variables_to_save = sum( [tf.global_variables('model/pretrain_q_net'), tf.global_variables('model/pretrain_p_net'), tf.global_variables('model/h_for_qz'), tf.global_variables('model/h_for_px')], [] ) pre_train_params = sum( [tf.trainable_variables('model/pretrain_q_net'), tf.trainable_variables('model/pretrain_p_net'), tf.trainable_variables('model/h_for_qz'), tf.trainable_variables('model/h_for_px')], [] ) pre_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) pre_gradients = pre_optimizer.compute_gradients(pretrain_loss, var_list=pre_train_params) with tf.name_scope('PreClipGradients'): for i, (g, v) in enumerate(pre_gradients): if g is not None: pre_gradients[i] = (tf.clip_by_norm( spt.utils.maybe_check_numerics(g, message='gradient on %s exceed' % str(v.name)), 10), v) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): pre_train_op = pre_optimizer.apply_gradients(pre_gradients) # obtain params and gradients (whole model) variables_to_save = tf.global_variables() train_params = tf.trainable_variables() # optimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) gradients = optimizer.compute_gradients(loss, var_list=train_params) # clip gradient by norm with tf.name_scope('ClipGradients'): for i, (g, v) in enumerate(gradients): if g is not None: gradients[i] = (tf.clip_by_norm( spt.utils.maybe_check_numerics(g, message="gradient on %s exceed" % str(v.name)), 10), v) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(gradients) pre_var_groups = [ model.variable_scope.name + '/pretrain_q_net', model.variable_scope.name + '/pretrain_p_net', model.variable_scope.name + '/h_for_qz', model.variable_scope.name + '/h_for_px' ] var_groups = [ # for q_net model.variable_scope.name + '/q_net', # for p_net model.variable_scope.name + '/p_net', # for flow model.variable_scope.name + '/posterior_flow' ] var_initializer = tf.variables_initializer(tf.global_variables()) train_flow = train_flow.threaded(5) valid_flow = valid_flow.threaded(5) pre_loop = TrainLoop(param_vars=pre_variables_to_save, var_groups=pre_var_groups, max_epoch=config.train.pretrain_max_epoch, summary_dir=(exp.abspath('pre_train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), summary_commit_freqs={'pretrain_loss': 10}, early_stopping=config.train.early_stopping, valid_metric_name='pretrain_valid_loss', valid_metric_smaller_is_better=True, checkpoint_dir=(exp.abspath('pre_ckpt_params') if config.save_ckpt else None), checkpoint_epoch_freq=config.pretrain_ckpt_epoch_freq, checkpoint_max_to_keep=config.pretrain_ckpt_max_keep) loop = TrainLoop(param_vars=variables_to_save, var_groups=var_groups, max_epoch=config.train.max_epoch, summary_dir=(exp.abspath('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), summary_commit_freqs={'loss': 10}, early_stopping=config.train.early_stopping, valid_metric_name='valid_loss', valid_metric_smaller_is_better=True, checkpoint_dir=(exp.abspath('ckpt_params') if config.save_ckpt else None), checkpoint_epoch_freq=config.ckpt_epoch_freq, checkpoint_max_to_keep=config.ckpt_max_keep ) if config.write_histogram_summary: summary_op = tf.summary.merge_all() else: summary_op = None pre_lr_value = get_lr_value(config.train.initial_lr, config.train.lr_anneal_factor, config.train.pretrain_lr_anneal_epoch_freq, pre_loop) lr_value = get_lr_value(config.train.initial_lr, config.train.lr_anneal_factor, config.train.lr_anneal_epoch_freq, loop) pre_trainer = Trainer(loop=pre_loop, train_op=pre_train_op, inputs=[input_x, input_u], data_flow=train_flow, feed_dict={learning_rate: pre_lr_value, is_training: True}, metrics={'pretrain_loss': pretrain_loss, 'pretrain_train_recons': pretrain_train_recons}, summaries=summary_op) trainer = Trainer(loop=loop, train_op=train_op, inputs=[input_x, input_u], data_flow=train_flow, feed_dict={learning_rate: lr_value, is_training: True}, metrics=train_metrics, summaries=summary_op) pre_validator = Evaluator(loop=pre_loop, metrics={'pretrain_valid_loss': pretrain_valid_loss, 'pretrain_valid_recons': pretrain_valid_recons}, inputs=[input_x, input_u], data_flow=valid_flow, time_metric_name='pre_valid_time') pre_validator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: exp.update_results(pre_validator.last_metrics_dict) ) validator = Evaluator(loop=loop, metrics=valid_metrics, inputs=[input_x, input_u], data_flow=valid_flow, time_metric_name='valid_time') validator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: exp.update_results(validator.last_metrics_dict) ) train_losses = [] tmp_collector = [] valid_losses = [] def on_metrics_collected(loop: TrainLoop, metrics): if 'loss' in metrics: tmp_collector.append(metrics['loss']) if loop.epoch % 1 == 0: if 'valid_loss' in metrics: valid_losses.append(metrics['valid_loss']) train_losses.append(np.mean(tmp_collector)) tmp_collector.clear() loop.events.on(spt.EventKeys.METRICS_COLLECTED, on_metrics_collected) pre_trainer.evaluate_after_epochs(pre_validator, freq=1) pre_trainer.log_after_epochs(freq=1) trainer.evaluate_after_epochs(validator, freq=1) trainer.log_after_epochs(freq=1) with spt.utils.create_session().as_default() as session: session.run(var_initializer) with pre_loop: pre_trainer.run() print('') print('PreTraining Finished.') if config.save_results: saver = tf.train.Saver(var_list=pre_variables_to_save) saver.save(session, os.path.join(exp.abspath('result_params'), 'restored_pretrain_params.dat')) print('') print('Pretrain Model saved.') print('************Start train the whole network***********') with loop: trainer.run() print('') print('Training Finished.') if config.save_results: saver = tf.train.Saver(var_list=variables_to_save) saver.save(session, os.path.join(exp.abspath('result_params'), "restored_params.dat")) print('') print('Model saved.') if __name__ == '__main__': with mltk.Experiment(ExpConfig()) as exp: exp.save_config() main(exp, exp.config) if exp.config.exp_dir_save_path is not None: with open(exp.config.exp_dir_save_path, 'a') as f: f.write("'" + exp.config.dataset + ' ' + exp.output_dir + "'" + '\n') ================================================ FILE: algorithm/utils.py ================================================ import tfsnippet as spt import numpy as np import os import pickle from sklearn.preprocessing import MinMaxScaler from typing import * import tensorflow as tf from functools import partial # here, use 'min_max' or 'mean_std' for different method # method = 'min_max' or 'mean_std' method = 'min_max' alpha = 4.0 # mean +/- alpha * std def get_sliding_window_data_flow(window_size, batch_size, x, u=None, y=None, shuffle=False, skip_incomplete=False) -> spt.DataFlow: n = len(x) seq = np.arange(window_size - 1, n, dtype=np.int32).reshape([-1, 1]) seq_df: spt.DataFlow = spt.DataFlow.arrays( [seq], shuffle=shuffle, skip_incomplete=skip_incomplete, batch_size=batch_size) offset = np.arange(-window_size + 1, 1, dtype=np.int32) if y is not None: if u is not None: df = seq_df.map(lambda idx: (x[idx + offset], u[idx + offset], y[idx + offset])) else: df = seq_df.map(lambda idx: (x[idx + offset], y[idx + offset])) else: if u is not None: df = seq_df.map(lambda idx: (x[idx + offset], u[idx + offset])) else: df = seq_df.map(lambda idx: (x[idx + offset],)) return df def time_generator(timestamp): mins = 60 hours = 24 days = 7 timestamp %= (mins * hours * days) res = np.zeros([mins + hours + days]) res[int(timestamp / hours / mins)] = 1 # day res[days + int((timestamp % (mins * hours)) / mins)] = 1 # hours res[days + hours + int(timestamp % mins)] = 1 # min return res def get_data_dim(dataset): if dataset == 'SWaT': return 51 elif dataset == 'WADI': return 118 elif str(dataset).startswith('machine'): return 38 elif str(dataset).startswith('omi'): return 19 else: raise ValueError('unknown dataset '+str(dataset)) def get_data(dataset, max_train_size=None, max_test_size=None, print_log=True, do_preprocess=True, train_start=0, test_start=0, valid_portion=0.3, prefix="./data/processed"): """ get data from pkl files return shape: (([train_size, x_dim], [train_size] or None), ([test_size, x_dim], [test_size])) """ if max_train_size is None: train_end = None else: train_end = train_start + max_train_size if max_test_size is None: test_end = None else: test_end = test_start + max_test_size print('load data of:', dataset) print("train: ", train_start, train_end) print("test: ", test_start, test_end) x_dim = get_data_dim(dataset) f = open(os.path.join(prefix, dataset + '_train.pkl'), "rb") train_data = pickle.load(f).reshape((-1, x_dim))[train_start:train_end, :] f.close() try: f = open(os.path.join(prefix, dataset + '_test.pkl'), "rb") test_data = pickle.load(f).reshape((-1, x_dim))[test_start:test_end, :] f.close() except (KeyError, FileNotFoundError): test_data = None try: f = open(os.path.join(prefix, dataset + "_test_label.pkl"), "rb") test_label = pickle.load(f).reshape((-1))[test_start:test_end] f.close() except (KeyError, FileNotFoundError): test_label = None if do_preprocess: # train_data = preprocess(train_data) # test_data = preprocess(test_data) train_data, test_data = preprocess(train_data, test_data, valid_portion=valid_portion) print("train set shape: ", train_data.shape) print("test set shape: ", test_data.shape) print("test set label shape: ", test_label.shape) return (train_data, None), (test_data, test_label) def preprocess(train, test, valid_portion=0): train = np.asarray(train, dtype=np.float32) test = np.asarray(test, dtype=np.float32) if len(train.shape) == 1 or len(test.shape) == 1: raise ValueError('Data must be a 2-D array') if np.any(sum(np.isnan(train)) != 0): print('Train data contains null values. Will be replaced with 0') train = np.nan_to_num(train) if np.any(sum(np.isnan(test)) != 0): print('Test data contains null values. Will be replaced with 0') test = np.nan_to_num(test) # revise here for other preprocess methods if method == 'min_max': if valid_portion > 0: split_idx = int(len(train) * valid_portion) train, valid = train[:-split_idx], train[-split_idx:] scaler = MinMaxScaler().fit(train) train = scaler.transform(train) valid = scaler.transform(valid) valid = np.clip(valid, a_min=-3.0, a_max=3.0) test = scaler.transform(test) test = np.clip(test, a_min=-3.0, a_max=3.0) train = np.concatenate([train, valid], axis=0) print('Data normalized with min-max scaler') else: scaler = MinMaxScaler().fit(train) train = scaler.transform(train) test = scaler.transform(test) test = np.clip(test, a_min=-3.0, a_max=3.0) print('Data normalized with min-max scaler') elif method == 'mean_std': def my_transform(value, ret_all=True, mean=None, std=None): if mean is None: mean = np.mean(value, axis=0) if std is None: std = np.std(value, axis=0) for i in range(value.shape[0]): clip_value = mean + alpha * std # compute clip value: (mean - a * std, mean + a * std) temp = value[i] < clip_value value[i] = temp * value[i] + (1 - temp) * clip_value clip_value = mean - alpha * std temp = value[i] > clip_value value[i] = temp * value[i] + (1 - temp) * clip_value std = np.maximum(std, 1e-5) # to avoid std -> 0 value[i] = (value[i] - mean) / std # normalization return value, mean, std if ret_all else value train, _mean, _std = my_transform(train) test = my_transform(test, False, _mean, _std)[0] print('Data normalized with standard scaler method') elif method == 'none': print('No pre-processing') else: raise RuntimeError('unknown preprocess method') return train, test TensorLike = Union[tf.Tensor, spt.StochasticTensor] class GraphNodes(Dict[str, TensorLike]): """A dict that maps name to TensorFlow graph nodes.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) for k, v in self.items(): if not spt.utils.is_tensor_object(v): raise TypeError(f'The value of `{k}` is not a tensor: {v!r}.') def eval(self, session: tf.Session = None, feed_dict: Dict[tf.Tensor, Any] = None) -> Dict[str, Any]: """ Evaluate all the nodes with the specified `session`. Args: session: The TensorFlow session. feed_dict: The feed dict. Returns: The node evaluation outputs. """ if session is None: session = spt.utils.get_default_session_or_error() keys = list(self) tensors = [self[key] for key in keys] outputs = session.run(tensors, feed_dict=feed_dict) return dict(zip(keys, outputs)) def add_prefix(self, prefix: str) -> 'GraphNodes': """ Add a common prefix to all metrics in this collection. Args: prefix: The common prefix. """ return GraphNodes({f'{prefix}{k}': v for k, v in self.items()}) def get_score(recons_probs, preserve_feature_dim=False, score_avg_window_size=1): """ Evaluate the anomaly score at each timestamp according to the reconstruction probability obtained by model. :param recons_probs: (data_length-window_length+1, window_length, x_dim). The reconstruction probabilities correspond to each timestamp and each dimension of x, evaluated in sliding windows with length 'window_length'. The larger the reconstruction probability, the less likely a point is an anomaly. :param preserve_feature_dim: bool. Whether sum over the feature dimension. If True, preserve the anomaly score on each feature dimension. If False, sum over the anomaly scores along feature dimension and return a single score on each timestamp. :param score_avg_window_size: int. How many scores in different sliding windows are used to evaluate the anomaly score at a given timestamp. By default score_avg_window_size=1, only the score of last point are used in each sliding window, and this score is directly used as the final anomaly score at this timestamp. When score_avg_window_size > 1, then the last 'score_avg_window_size' scores are used in each sliding window. Then for timestamp t, if t is the last point of sliding window k, then the anomaly score of t is now evaluated as the average score_{t} in sliding windows [k, k+1, ..., k+score_avg_window_size-1]. :return: Anomaly scores (reconstruction probability) at each timestamps. With shape ``(data_length - window_size + score_avg_window_size,)`` if `preserve_feature_dim` is `False`, or ``(data_length - window_size + score_avg_window_size, x_dim)`` if `preserve_feature_dim` is `True`. The first `window_size - score_avg_window_size` points are discarded since there aren't enough previous values to evaluate the score. """ data_length = recons_probs.shape[0] + recons_probs.shape[1] - 1 window_length = recons_probs.shape[1] score_collector = [[] for i in range(data_length)] for i in range(recons_probs.shape[0]): for j in range(score_avg_window_size): score_collector[i + window_length - j - 1].append(recons_probs[i, -j-1]) score_collector = score_collector[window_length-score_avg_window_size:] scores = [] for i in range(len(score_collector)): scores.append(np.mean(score_collector[i], axis=0)) scores = np.array(scores) # average over the score_avg_window. (data_length-window_length+score_avg_window_size, x_dim) if not preserve_feature_dim: scores = np.sum(scores, axis=-1) return scores def get_avg_recons(recons_vals, window_length, recons_avg_window_size=1): """ Get the averaged reconstruction values for plotting. The last `recons_avg_window_size` points in each reconstruct sliding windows are used, the final reconstruction values at each timestamp is the mean of each value at this timestamp. :param recons_vals: original reconstruction values. shape: (data_length - window_length + 1, window_length, x_dim) :param recons_avg_window_size: int. How many points are used in each reconstruct sliding window. :return: final reconstruction curve. shape: (data_length, x_dim) The first `window_size - recons_avg_window_size` points use the reconstruction value of the first reconstruction window, others use the averaged values according to `recons_vals` and `recons_avg_window_size`. """ data_length = recons_vals.shape[0] + window_length - 1 recons_collector = [[] for i in range(data_length)] for i in range(recons_vals.shape[0]): for j in range(recons_avg_window_size): recons_collector[i + window_length - j - 1].append(recons_vals[i, -j-1, :]) if recons_vals.shape[1] < window_length: for i in range(window_length - recons_avg_window_size): recons_collector[i] = [recons_vals[0, -1, :]] else: for i in range(window_length - recons_avg_window_size): recons_collector[i] = [recons_vals[0, i, :]] final_recons = [] for i in range(len(recons_collector)): final_recons.append(np.mean(recons_collector[i], axis=0)) final_recons = np.array(final_recons) # average over the recons_avg_window. (data_length, x_dim) return final_recons ================================================ FILE: data/interpretation_label/anomaly_type.txt ================================================ dataset,anomaly_segment,anomaly_type omi-1,760-765,metric omi-1,1064-1298,metric-temporal omi-1,2758-2772,metric omi-1,2874-2885,temporal omi-1,3012-3025,metric-temporal omi-1,3160-3305,metric-temporal omi-1,3626-3638,metric-temporal omi-2,486-498,metric-temporal omi-2,740-753,temporal omi-2,766-783,temporal omi-2,1436-1445,temporal omi-3,760-820,metric-temporal omi-3,842-860,metric-temporal omi-3,1080-1120,temporal omi-3,2188-2201,temporal omi-3,2497-2500,temporal omi-3,3428-3434,temporal omi-4,727-782,metric-temporal omi-4,843-857,metric-temporal omi-4,2178-2238,metric-temporal omi-4,2494-2501,temporal omi-4,3425-3435,temporal omi-5,765-781,temporal omi-5,1589-1617,metric-temporal omi-5,2440-2446,metric omi-5,3356-3363,metric-temporal omi-5,3416-3423,temporal omi-5,3590-3594,metric-temporal omi-6,357-392,metric omi-6,750-788,metric-temporal omi-6,1278-1293,metric omi-6,1590-1615,metric-temporal omi-6,2592-2604,metric omi-6,2734-2765,metric-temporal omi-6,3066-3072,metric-temporal omi-6,3596-3605,metric-temporal omi-6,3610-3622,temporal omi-6,4159-4164,temporal omi-7,738-778,temporal omi-7,1625-1660,temporal omi-7,3311-3320,temporal omi-8,740-753,temporal omi-8,764-780,metric-temporal omi-8,2319-2325,temporal omi-8,2586-2675,metric-temporal omi-8,2897-2956,metric-temporal omi-9,738-756,metric-temporal omi-9,763-780,metric-temporal omi-9,2514-2524,temporal omi-9,2603-2668,metric-temporal omi-9,2918-2958,metric-temporal omi-9,3170-3245,metric-temporal omi-9,3508-3528,metric omi-9,4064-4108,metric-temporal omi-10,740-780,metric-temporal omi-10,1345-1406,temporal omi-10,2224-2272,metric-temporal omi-10,2514-2592,metric omi-10,2733-2778,metric omi-10,3401-3405,temporal omi-10,3414-3459,temporal omi-10,3700-3744,metric-temporal omi-11,738-755,temporal omi-11,757-781,metric-temporal omi-11,782-815,temporal omi-11,1366-1375,temporal omi-11,2301-2314,metric omi-11,2365-2381,metric-temporal omi-11,2464-2485,metric-temporal omi-11,2588-2610,metric-temporal omi-11,2650-2675,metric omi-11,3012-3072,temporal omi-12,740-813,temporal omi-12,2486-2493,temporal omi-12,2515-2525,metric-temporal omi-12,2721-2723,temporal omi-12,3500-3535,metric ================================================ FILE: data/interpretation_label/machine-1-1.txt ================================================ 15849-16395:1,9,10,12,13,14,15 16963-17517:1,2,3,4,6,7,9,10,11,12,13,14,15,16,19,20,21,22,24,25,26,27,28,29,30,31,32,33,34,35,36 18071-18528:1,2,9,10,12,13,14,15 19367-20088:1,2,3,4,9,10,11,12,13,14,15,16,25,28 20786-21195:1,9,10,12,13,14,15 24679-24682:9,13,14,15 26114-26116:9,13,14,15 27554-27556:9,13,14,15 ================================================ FILE: data/interpretation_label/machine-1-6.txt ================================================ 246-252:1,2,3,4,6,9,10,11,12,13,15 653-658:1,2,3,4,6,9,10,11,12,13,15,19,20,21,22,26,28,30,31,32 2092-2100:1,2,3,4,6,9,10,11,12,13,15,19,20,21,22,26,28,30,31,32 2884-2888:6 3534-3539:1,2,3,4,6,9,10,11,12,13,15,19,20,21,22 4647-5045:5,6,9,10,11,13,17,33,34 5167-5172:9,10,11,13,18 5708-5713:9,10,11,13 5873-5885:2,3,6,9,10,11,13,19,20,21,22,28,31,32 6022-6027:33,34 6412-6419:1,2,3,6,9,10,11,13,15,19,20,21,22,28,30,31,32,35,36 7851-7856:1,2,9,10,11,13,19,20,21,22,23,28,30,31,32 9291-9298:1,2,3,9,10,11,13,19,20,21,22,23,28,30,31,32 10731-10736:1,2,3,9,10,11,13,19,20,21,22,23,28,30,31,32 11467-11471:1,2,3,6,9,10,11,13,19,20,21,22,23,28,30,31,32 12171-12176:1,2,3,6,9,10,11,13,19,20,21,22,23,28,30,31,32 13069-13073:33,34 13277-13280:17 13613-13619:1,2,9,10,11,12,13,30 14603-14607:9,10,11,13,19,20,21,22,32,37 15052-15055:9,10,11,13 15397-15401:9,12,13 15802-15805:9,10,11,13 16491-16499:1,2,3,9,10,11,13,15 16718-16721:9,10,11,13 16972-16976:9,12,13,15 17931-17939:1,2,3,6,9,10,11,12,13,15,19,20,21,22,23,28,30,31,32 18600-21761:1,2,3,4,6,9,10,11,12,13,14,15,16,23,25,28,30,31,32,35,36 22252-22260:9,13,15 22417-22420:11,13 ================================================ FILE: data/interpretation_label/machine-1-7.txt ================================================ 837-858:1,6,9,12,13,14,15,16 2959-4174:6,9,12,13,14,15,16,33,34 5849-5940:1,2,3,4,6,7,9,10,11,13,15,19,20,21,22,23,24,25,26,28,30,31,32,35,36 6031-6034:33,34 7099-8000:1,9,12,13,14,15,17,18 8564-8580:1,9,10,11,13,19,20,21,22,23,24,25,26,28,31,32,35,36 12359-12374:1,9,12,13,14,15,16 13799-13825:9,12,13,14,15,16 15239-15270:1,6,9,12,13,14,15,16 15960-15963:17 18109-18150:1,2,3,4,6,7,9,12,13,14,15,16,17,19,20,21,22,23,24,25,26,28,29,30,31,32,5,36 19442-19446:33,34 19559-19590:1,6,9,12,13,14,15,16 ================================================ FILE: data/interpretation_label/machine-2-1.txt ================================================ 6506-6528:23,25,32,35,36 7907-7961:9,10,11,13,32 9340-9386:9,10,11,13,32,33,34 9779-9805:5,33,34 12173-12181:19,20,21,22,28 16136-16588:9,10,24,26 17365-17382:1,2,3,4,9,14,15,16,18,24,26 17995-18028:9,10,11,13,15 18575-18650:1,19,20,21,22 20020-20036:1,6,7 20891-20912:10,13 20340-20352:9,10,11,13,32 22907-23295:1,2,3,4,5,6,7,9,10,11,12,13,14,16,17,19,20,21,22,23,24,25,26,28,31,32,35,36 ================================================ FILE: data/interpretation_label/machine-2-2.txt ================================================ 12760-12771:1,2,3,4,6,7,23,24,25,26,31,32,33,34,35,36 14511-14521:1,2,3,4,6,7,23,24,25,26,31,32,35,36 15630-16502:10,11,12,13 17090-17942:10,11,12,13 18541-19382:10,11,12,13 20820-20823:10,11,12,13 21024-21027:11 21280-21295:1,2,3,4,6,7,32 21405-21428:19,20,21,22,28,31 22260-22263:6,10,11,12,13 23099-23299:9,15 ================================================ FILE: data/interpretation_label/machine-2-7.txt ================================================ 664-667:9,13,14 2104-2107:13,14 3544-3547:9,13,14,15 4984-4987:9,13,14,15 6030-6034:33,34 7665-7669:30 7863-7867:9,13,14,15 9304-9307:9,13,14,15 10744-10747:9,13,14,15 12099-12127:1,2,3,4,24,26 13623-13625:9,13,14,15 14934-14938:33,34 15063-15066:9,13,14,15 16534-16557:9,13,14,15 17978-17981:9,13,14,15 18639-18944:1,2,3,4,6,7,11,16,19,20,21,22,23,25,28,29,31,32,34,35,36 19411-19418:9,13,14,15 20853-20857:9,13,14,15 22291-22296:9,13,14,15 22924-22927:10 ================================================ FILE: data/interpretation_label/machine-2-8.txt ================================================ 17580-17741:1,2,3,4,5,6,7,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,28,29,30,31,32,33,34,35,36 ================================================ FILE: data/interpretation_label/machine-3-11.txt ================================================ 21230-21296:1,2,3,4,5,6,7,10,11,16,19,20,21,22,23,24,26,28,30,31,32,35,36 27825-27951:1,2,3,4,7,19,20,21,22,23,24,26,28,30,31,32,34,35,36,37 27995-28001:10,11 ================================================ FILE: data/interpretation_label/machine-3-3.txt ================================================ 739-742:10,11,12,13 1808-1814:18 2179-2182:10,11,12,13 3619-3622:10,11,12,13 4561-4578:1,6,7,9,10,11,12,13,14,15,19,20,21,22,23,24,25,26,28,31,35,36,37,38 4564-4567:1,6,7,9,10,11,12,13,14,15,19,20,21,22,23,24,25,26,28,31,35,36,37,38 5059-5062:10,11,12,13 5449-5453:12,13,15,16,17 6499-6502:10,11,12,13 7939-7942:10,11,12,13 8407-8414:12,13,15,16,17 9737-9740:15,16,17 10819-10822:10,11,12,13 12259-12262:10,11,12,13 13699-13702:10,11,12,13 13827-13830:17 14476-14486:14 15139-15142:10,11,12,13 16199-16212:10,11,12,13 16579-16582:1,2,3,4,19,21,22,25,28,31 18019-18022:10,11,12,13 19081-19088:10,11,12,13 19349-19830:1,2,3 19459-19462:1,2,3,4,10,11,12,19,20,21,22,23,25,28,31,35,36,37,38 20897-20903:10,11,12,13 21878-21893:18 22339-22342:10,11,12,13 23159-23180:1,2,3,4,19,21,22,28,31 ================================================ FILE: data/interpretation_label/machine-3-4.txt ================================================ 2734-3520:1,2,3,4,6,7,11,16 4474-4550:2,11 6013-6016:10,12,13,14 10963-10969:30,33,34 11565-11569:24,33,34 13699-13709:24,33,34 18589-18640:1,2,3,4 18784-18825:1,2,3 ================================================ FILE: data/interpretation_label/machine-3-6.txt ================================================ 1187-1221:1,2,3,4,19,20,21,22,23,24,25,28,29,31,36,35,34,32 8177-8211:1,2,3,4,34,33 10637-10743:1,33,34 15633-15863:1,9,10,12,13,14,15,16 16788-16995:1,10,12,13,14,15 18054-18134:1,10,12,13,14,15 19276-19500:1,10,12,13,14,15 20681-20865:1,9,10,12,13,14,15 24503-24506:1,9,10,12,13,14,15 26138-26149:1,12 27877-27958:1,2,3,4,6,7,11,16,19,20,21,22,23,24,25,26,28,30,31,32,33,35,36 ================================================ FILE: data/interpretation_label/machine-3-8.txt ================================================ 15144-15266:1,2,3,4,19,20,21,22,28,31,35,36 15941-16514:1,2,3,4,19,20,21,22,28,30,31 17063-17276:1,2,3,4,19,20,21,22,23,24,25,26,28,29,30,31,32,33,34,35,36 21405-21548:9,10,12,13 23478-23781:9,10,13,14,30 28253-28270:10,12,13 ================================================ FILE: data/interpretation_label/omi-1.txt ================================================ 760-765:4,5,7,8,9,10,11,12,13,14,15,16,17,18,19 1064-1298:6,7,9,10,11,12,13,14,15,16,17,18,19 2758-2772:5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 2874-2885:7,9,10,11,12,13,14,15,16,17,18 3012-3025:7,10,11,12,13,14,15,16,17,18,19 3160-3305:8,9,10,11,12,13,14,15,16,17,18,19 3626-3638:7,9,10,11,12,13,14,15,16,17,18,19 ================================================ FILE: data/interpretation_label/omi-10.txt ================================================ 740-780:2,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 1345-1406:7,9,10,11,12,13,14,15,16,17,18 2224-2272:4,5,6,7,9,10,11,12,13,14,15,16,17,18,19 2514-2592:4,16 2733-2778:11,14,15 3401-3405:1 3414-3459:8 3700-3744:7,8,9,10,11,12,13,14,15,16,17,18 ================================================ FILE: data/interpretation_label/omi-11.txt ================================================ 738-755:2 757-781:1,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 782-815:1,4,6,8,9,10,11,12,13,14,15,16,17,18,19 1366-1375:4,5,8,17,19 2301-2314:4,5,17,19 2365-2381:19 2464-2485:4,5,17,19 2588-2610:4,5,17,19 2650-2675:19 3012-3072:1,4,5,8,17,19 ================================================ FILE: data/interpretation_label/omi-12.txt ================================================ 740-813:1,2,6,7,8,9,10,11,12,13,14,15,16,17,18,19 2486-2493:4,5 2515-2525:1,4,5,7,8,9,17,18 2721-2723:4 3500-3535:5,6,7,8,11,12,13,14,15,16,17,18,19 ================================================ FILE: data/interpretation_label/omi-2.txt ================================================ 486-498:1,4,5 740-753:2 766-783:1,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 1436-1445:7,9,10,11,12,13,14,15,16,17,18 ================================================ FILE: data/interpretation_label/omi-3.txt ================================================ 760-820:1,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19 842-860:7,8,9,10,11,12,13,14,15,16,17,18,19 1080-1120:7,9,10,11,12,13,14,15,16,17,18 2188-2201:7,9,10,11,12,13,14,15,16,17,18,19 2497-2500:6,7,9,10,11,12,13,14,15,16,17,18,19 3428-3434:7,8,9,10,11,12,13,14,15,16,17,18,19 ================================================ FILE: data/interpretation_label/omi-4.txt ================================================ 727-782:1,2,6,7,8,9,10,11,12,13,14,15,16,17,18,19 843-857:6,7,8,9,10,11,12,13,14,15,16,17,18,19 2178-2238:8,9,10,11,12,13,14,15,16,17,18,19 2494-2501:7,9,10,11,12,13,14,15,16,17,18,19 3425-3435:7,8,9,10,11,12,13,14,15,16,17,18,19 ================================================ FILE: data/interpretation_label/omi-5.txt ================================================ 765-781:6,7,8,9,10,11,12,13,14,15,16,17,18,19 1589-1617:2,4,17 2440-2446:8,11,12,13,14,15,16,17,18,19 3356-3363:5,6,17 3416-3423:4,5,7,9,10,11,12,13,14,15,16,17,18,19 3590-3594:1,8,17,18,19 ================================================ FILE: data/interpretation_label/omi-6.txt ================================================ 357-392:4,19 750-788:2,6,7,8,9,10,11,12,13,14,15,16,17,18,19 1278-1293:4,5,6,8,17 1590-1615:1,9,18 2592-2604:4,6,9 2734-2765:4,6,8,19 3066-3072:4,6,19 3596-3605:4,5,6,19 3610-3622:1,4,5 4159-4164:4,5 ================================================ FILE: data/interpretation_label/omi-7.txt ================================================ 738-778:1,2,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19 1625-1660:7,10,11,12,13,14,15,16,17,18,19 3311-3320:7,10,11,12,13,14,15,16,17,18,19 ================================================ FILE: data/interpretation_label/omi-8.txt ================================================ 740-753:2,4 764-780:6,7,8,9,10,11,12,13,14,15,16,17,18,19 2319-2325:4,5,6 2586-2675:5,6,8,17,18,19 2897-2956:2,6,8,17,18,19 ================================================ FILE: data/interpretation_label/omi-9.txt ================================================ 738-756:2,9,10,11,12,13,14,15,16,17,18,19 763-780:1,6,7,8,9,10,11,12,13,14,15,16,17,18,19 2514-2524:1,4,5,7,8,9,17,18 2603-2668:1,2,5,6,8,17,18,19 2918-2958:1,2,5,6,8,17,18,19 3170-3245:1,2,6,8,17,18,19 3508-3528:2,6,8,19 4064-4108:1,2,6,7,8,9,17,18,19 ================================================ FILE: explib/__init__.py ================================================ from .utils import * from .eval_methods import * ================================================ FILE: explib/eval_methods.py ================================================ # -*- coding: utf-8 -*- import numpy as np import sklearn.metrics def calc_point2point(predict, actual): """ calculate f1 score by predict and actual. Args: predict (np.ndarray): the predict label actual (np.ndarray): np.ndarray """ TP = np.sum(predict * actual) TN = np.sum((1 - predict) * (1 - actual)) FP = np.sum(predict * (1 - actual)) FN = np.sum((1 - predict) * actual) precision = TP / (TP + FP + 0.00001) recall = TP / (TP + FN + 0.00001) f1 = 2 * precision * recall / (precision + recall + 0.00001) return f1, precision, recall, TP, TN, FP, FN def adjust_predicts(score, label, threshold=None, pred=None, calc_latency=False): """ Calculate adjusted predict labels using given `score`, `threshold` (or given `pred`) and `label`. Args: score (np.ndarray): The anomaly score label (np.ndarray): The ground-truth label threshold (float): The threshold of anomaly score. A point is labeled as "anomaly" if its score is lower than the threshold. pred (np.ndarray or None): if not None, adjust `pred` and ignore `score` and `threshold`, calc_latency (bool): Returns: np.ndarray: predict labels """ if len(score) != len(label): raise ValueError("score and label must have the same length") score = np.asarray(score) label = np.asarray(label) latency = 0 if pred is None: predict = score < threshold else: predict = pred actual = label > 0.1 anomaly_state = False anomaly_count = 0 for i in range(len(score)): if actual[i] and predict[i] and not anomaly_state: anomaly_state = True anomaly_count += 1 for j in range(i, 0, -1): if not actual[j]: break else: if not predict[j]: predict[j] = True latency += 1 elif not actual[i]: anomaly_state = False if anomaly_state: predict[i] = True if calc_latency: return predict, latency / (anomaly_count + 1e-4) else: return predict def calc_seq(score, label, threshold, calc_latency=False): """ Calculate f1 score for a score sequence """ if calc_latency: predict, latency = adjust_predicts(score, label, threshold, calc_latency=calc_latency) t = list(calc_point2point(predict, label)) t.append(latency) return t else: predict = adjust_predicts(score, label, threshold, calc_latency=calc_latency) return calc_point2point(predict, label) # here for our refined best-f1 search method def get_best_f1(score, label): ''' :param score: 1-D array, input score, tot_length :param label: 1-D array, standard label for anomaly :return: list for results, threshold ''' assert score.shape == label.shape print('***computing best f1***') search_set = [] tot_anomaly = 0 for i in range(label.shape[0]): tot_anomaly += (label[i] > 0.5) flag = 0 cur_anomaly_len = 0 cur_min_anomaly_score = 1e5 for i in range(label.shape[0]): if label[i] > 0.5: # here for an anomaly if flag == 1: cur_anomaly_len += 1 cur_min_anomaly_score = score[i] if score[i] < cur_min_anomaly_score else cur_min_anomaly_score else: flag = 1 cur_anomaly_len = 1 cur_min_anomaly_score = score[i] else: # here for normal points if flag == 1: flag = 0 search_set.append((cur_min_anomaly_score, cur_anomaly_len, True)) search_set.append((score[i], 1, False)) else: search_set.append((score[i], 1, False)) if flag == 1: search_set.append((cur_min_anomaly_score, cur_anomaly_len, True)) search_set.sort(key=lambda x: x[0]) best_f1_res = - 1 threshold = 1 P = 0 TP = 0 best_P = 0 best_TP = 0 for i in range(len(search_set)): P += search_set[i][1] if search_set[i][2]: # for an anomaly point TP += search_set[i][1] precision = TP / (P + 1e-5) recall = TP / (tot_anomaly + 1e-5) f1 = 2 * precision * recall / (precision + recall + 1e-5) if f1 > best_f1_res: best_f1_res = f1 threshold = search_set[i][0] best_P = P best_TP = TP print('*** best_f1 ***: ', best_f1_res) print('*** threshold ***: ', threshold) return (best_f1_res, best_TP / (best_P + 1e-5), best_TP / (tot_anomaly + 1e-5), best_TP, score.shape[0] - best_P - tot_anomaly + best_TP, best_P - best_TP, tot_anomaly - best_TP), threshold # calculate evaluation metrics (best-F1, AUROC, AP) under point-adjust approach. def get_adjusted_composite_metrics(score, label): score = -score # change the recons prob to anomaly score, higher anomaly score means more anomalous # adjust the score for segment detection. i.e., for each ground-truth anomaly segment, use the maximum score # as the score of all points in that segment. This corresponds to point-adjust f1-score. assert len(score) == len(label) splits = np.where(label[1:] != label[:-1])[0] + 1 is_anomaly = label[0] == 1 pos = 0 for sp in splits: if is_anomaly: score[pos:sp] = np.max(score[pos:sp]) is_anomaly = not is_anomaly pos = sp sp = len(label) if is_anomaly: score[pos:sp] = np.max(score[pos:sp]) # now get the adjust score for segment evaluation. fpr, tpr, _ = sklearn.metrics.roc_curve(y_true=label, y_score=score, drop_intermediate=False) auroc = sklearn.metrics.auc(fpr, tpr) precision, recall, _ = sklearn.metrics.precision_recall_curve(y_true=label, probas_pred=score) # validate best f1 f1 = np.max(2 * precision * recall / (precision + recall + 1e-5)) ap = sklearn.metrics.average_precision_score(y_true=label, y_score=score, average=None) return auroc, ap, f1, precision, recall, fpr, tpr ================================================ FILE: explib/raw_data_converter.py ================================================ import numpy as np import pandas as pd import pickle as pkl # preprocess for SWaT. SWaT.A2_Dec2015, version 0 df = pd.read_csv('SWaT_Dataset_Attack_v0.csv') y = df['Normal/Attack'].to_numpy() labels = [] for i in y: if i == 'Attack': labels.append(1) else: labels.append(0) labels = np.array(labels) assert len(labels) == 449919 # pkl.dump(labels, open('SWaT_test_label.pkl', 'wb')) print('SWaT_test_label saved') df = df.drop(columns=[' Timestamp', 'Normal/Attack']) test = df.to_numpy() assert test.shape == (449919, 51) # pkl.dump(test, open('SWaT_test.pkl', 'wb')) print('SWaT_test saved') df = pd.read_csv('SWaT_Dataset_Normal_v0.csv') df = df.drop(columns=['Unnamed: 0','Unnamed: 52']) train = df[1:].to_numpy() assert train.shape == (496800, 51) # pkl.dump(train, open('SWaT_train.pkl', 'wb')) print('SWaT_train saved') # preprocess for WADI. WADI.A1 a = str(open('WADI_14days.csv', 'rb').read(), encoding='utf8').split('\n')[5: -1] a = '\n'.join(a) with open('train1.csv', 'wb') as f: f.write(a.encode('utf8')) a = pd.read_csv('train1.csv', header=None) a = a.to_numpy()[:, 3:] nan_cols = [] for j in range(a.shape[1]): for i in range(a.shape[0]): if a[i][j] != a[i][j]: nan_cols.append(j) break # len(nan_cols) == 9 train = np.delete(a, nan_cols, axis=1) assert train.shape == (1209601, 118) # pkl.dump(train, open('WADI_train.pkl', 'wb')) print('WADI_train saved') df = pd.read_csv('WADI_attackdata.csv') test = df.to_numpy()[:, 3:] test = np.delete(test, nan_cols, axis=1) assert test.shape == (172801, 118) # pkl.dump(test, open('WADI_test.pkl', 'wb')) print('WADI_test saved') print('WADI_test_label saved') # WADI labels.pkl are created manually via the description file of the dataset ================================================ FILE: explib/utils.py ================================================ import codecs import json import os import yaml def parse_file(path): """ Parse configuration values form the given file. Args: path (str): Path of the file. It should be a JSON file or a YAML file, with corresponding file extension. """ _, ext = os.path.splitext(path) config_dict = None if ext == '.json': with codecs.open(path, 'rb', 'utf-8') as f: config_dict = dict(json.load(f)) elif ext in ('.yml', '.yaml'): with codecs.open(path, 'rb', 'utf-8') as f: config_dict = dict(yaml.load(f)) else: raise ValueError('Config file of this type is not supported: {}'. format(path)) return config_dict class Singleton(object): """ Base class for singleton classes. >>> class Parent(Singleton): ... pass >>> class Child(Parent): ... pass >>> Parent() is Parent() True >>> Child() is Child() True >>> Parent() is not Child() True """ __instances_dict = {} def __new__(cls, *args, **kwargs): if cls not in Singleton.__instances_dict: Singleton.__instances_dict[cls] = \ object.__new__(cls, *args, **kwargs) return Singleton.__instances_dict[cls] ================================================ FILE: requirements.txt ================================================ numpy==1.17.0 tensorflow-gpu==1.12.0 typing-extensions==3.7.4.1 typing-inspect==0.5.0 tqdm==4.31.1 pickleshare==0.7.5 scikit-learn==0.20.3 scipy==1.2.1 pandas==0.24.2 matplotlib==2.0.2 seaborn==0.9.0 dataclasses==0.7 dataclasses-json==0.3.5 Click==7.0 fs==2.4.4 six==1.11.0 git+https://github.com/thu-ml/zhusuan.git@48c0f4e git+https://github.com/haowen-xu/tfsnippet.git@v0.2.0-alpha4 git+https://github.com/haowen-xu/ml-essentials.git