Repository: mohaseeb/shaplets-python Branch: master Commit: 5a3ab5fd09db Files: 25 Total size: 39.8 KB Directory structure: gitextract_k9abr_mg/ ├── .gitignore ├── README.md ├── example.py ├── setup.py └── shapelets_lts/ ├── __init__.py ├── classification/ │ ├── __init__.py │ └── shapelet_models.py ├── network/ │ ├── __init__.py │ ├── aggregation_layer.py │ ├── cross_entropy_loss_layer.py │ ├── linear_layer.py │ ├── network.py │ ├── sigmoid_layer.py │ └── soft_min_layer.py ├── tests/ │ ├── test_aggregation_layer.py │ ├── test_cross_entropy_loss_layer.py │ ├── test_linear_layer.py │ ├── test_sigmoid_layer.py │ ├── test_soft_min_layer.py │ └── test_utils.py └── util/ ├── __init__.py ├── plotting.py ├── soft_min_layer_factory.py ├── ucr_dataset_loader.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .idea/ *.pyc .cache/ workspace/ data loss.png *.egg-info ================================================ FILE: README.md ================================================ # shaplets Python implementation of [the Learning Time-Series Shapelets method by Josif Grabocka et al.](http://www.ismll.uni-hildesheim.de/pub/pdfs/grabocka2014e-kdd.pdf), that learns a shapelet-based time-series classifier with gradient descent. This implementation views the model as a layered graph, where each layer implements a forward, backword and parameters update methods (see below diagram). This abstraction simplifies thinking about the algorithm and implementing it. ![Network diagram](lts-diag.png) ## Differences from the paper ## * This implmenetation employs two (LinearLayer + SigmoidLayer) pairs instead of one (LinearLayer + SigmoidLayer) pair as in the paper (and shown in above diagram). This (using two pairs) has yielded improved results on some datasets. To have a similar setup as in the paper, simply update `shapelets_lts/classification/shapelet_models.py:LtsShapeletClassifier._init_network()`. * The loss in this implementation is an updated version of the one in the paper to allow training a single model for all the classes in the dataset (rather than one model/class). The impact on performance was not analysed. For details check `shapelets_lts/network/cross_entropy_loss_layer.py` ## Installation ## ```bash git clone git@github.com:mohaseeb/shaplets-python.git cd shaplets-python pip install . # or, for dev # pip install .[dev] ``` ## Usage ## ```python from shapelets_lts.classification import LtsShapeletClassifier # create an LtsShapeletClassifier instance classifier = LtsShapeletClassifier( K=20, R=3, L_min=30, epocs=50, lamda=0.01, eta=0.01, shapelet_initialization='segments_centroids', plot_loss=True ) # train the classifier. # train_data.shape -> (# train samples X time-series length) # train_label.shape -> (# train samples) classifier.fit(train_data, train_label, plot_loss=True) # evaluate on test data. # test_data.shape -> (# test samples X time-series length) prediction = classifier.predict(test_data) # retrieve the learnt shapelets shapelets = classifier.get_shapelets() # and plot sample shapelets from shapelets_lts.util import plot_sample_shapelets plot_sample_shapelets(shapelets=shapelets, sample_size=36) ``` Also have a look at example.py. For a stable training, the samples might need to be scaled. Example plot from plot_sample_shapelets. ![sample_shapelets](sample_shapelets.png) ================================================ FILE: example.py ================================================ from __future__ import division, print_function from os.path import expanduser from sklearn.metrics import classification_report from shapelets_lts.classification import LtsShapeletClassifier from shapelets_lts.util import ucr_dataset_loader, plot_sample_shapelets """ This example uses dataset from the UCR archive "UCR Time Series Classification Archive" format. - Follow the instruction on the UCR page (http://www.cs.ucr.edu/~eamonn/time_series_data/) to download the dataset. You need to be patient! :) - Update the vars below to point to the correct dataset location in your machine. Otherwise update _load_train_test_datasets() below to return your own dataset. """ ucr_dataset_base_folder = expanduser('~/ws/data/UCR_TS_Archive_2015/') ucr_dataset_name = 'Gun_Point' def main(): # load the data print('\nLoading data...') x_train, y_train, x_test, y_test = _load_train_test_datasets() # create a classifier Q = x_train.shape[1] K = int(0.15 * Q) L_min = int(0.2 * Q) clf = LtsShapeletClassifier( K=K, R=3, L_min=L_min, epocs=30, lamda=0.01, eta=0.01, shapelet_initialization='segments_centroids', plot_loss=True ) # train the classifier print('\nTraining...') clf.fit(x_train, y_train) # evaluate on test data print('\nEvaluating...') y_pred = clf.predict(x_test) print( 'classification report...\n{}' ''.format(classification_report(y_true=y_test, y_pred=y_pred)) ) # plot sample shapelets print('\nPlotting sample shapelets...') plot_sample_shapelets(shapelets=clf.get_shapelets(), sample_size=36) def _load_train_test_datasets(): """ :return: numpy arrays, train_data, train_labels, test_data, test_labels train_data and test_data shape is: (n_samples, n_features) train_labels and test_labels shape is: (n_samples) """ return ucr_dataset_loader.load_dataset( dataset_name=ucr_dataset_name, dataset_folder=ucr_dataset_base_folder ) if __name__ == '__main__': main() ================================================ FILE: setup.py ================================================ from setuptools import setup, find_packages setup( name='shapelets-lts', version='0.3.1', install_requires=[ 'numpy>=1.15.4,<2.0.0', 'pandas>=0.23.4,<2.0.0' 'scipy>=1.2.0,<2.0.0', 'scikit-learn>=0.20.2,<2.0.0', 'matplotlib>=2.2.3,<3.0.0', 'seaborn>=0.9.0,<2.0.0' ], extras_require={'dev': ['nose>=1.3.7,<2.0.0', 'ipython>=5.8.0,<6.0.0']}, packages=find_packages() ) ================================================ FILE: shapelets_lts/__init__.py ================================================ ================================================ FILE: shapelets_lts/classification/__init__.py ================================================ from .shapelet_models import LtsShapeletClassifier ================================================ FILE: shapelets_lts/classification/shapelet_models.py ================================================ from __future__ import division import copy import matplotlib.pyplot as plt import numpy as np from sklearn.base import BaseEstimator from shapelets_lts.network import AggregationLayer from shapelets_lts.network import CrossEntropyLossLayer from shapelets_lts.network import LinearLayer from shapelets_lts.network import Network from shapelets_lts.network import SigmoidLayer from shapelets_lts.network import SoftMinLayer from shapelets_lts.util import utils """ This class implements the sklearn estimator interface, so sklearn tools like GridsearchCV can be used """ class LtsShapeletClassifier(BaseEstimator): def __init__(self, K=20, R=3, L_min=30, alpha=-100, eta=0.01, lamda=0.01, epocs=10, shapelet_initialization='segments_centroids', plot_loss=False): """ :param K: number of shapelets :param R: scales of shapelet lengths :param L_min: minimum shapelet length """ # Shapelet related self.K = K self.R = R self.n_shapelets = None self.L_min = L_min self.alpha = alpha # Training data related self.train_data = None self.train_labels = None self._orig_labels = None self.output_size = None self.train_size = None # validation data self.valid_data = None self.valid_labels = None # Hyper parameters self.epocs = epocs self.eta = eta # learning rate self.lamda = lamda # regularization parameter # other self.network = None self.shapelet_initialization = shapelet_initialization self.plot_loss = plot_loss self.loss_ax = self.valid_ax = None def set_params(self, **parameters): for parameter, value in parameters.items(): setattr(self, parameter, value) return self def fit(self, X, y): self.n_shapelets = self.K * self.R self.train_data = X self._orig_labels = y self.train_labels = utils.get_one_active_representation(y) self.train_size, self.output_size = self.train_labels.shape self._init_network() self._train_network() return self def get_shapelets(self): """ Returns: list: List of 1-d numpy arrays, one array per shapelet. """ # Each shapelet is contained in SoftMinLayer. SoftMinLayers are # stored in AggregationLayer [aggregation_layer] = [ layer for layer in self.network.get_layers() if isinstance(layer, AggregationLayer) ] return aggregation_layer.get_shapelets() def predict(self, X): tmp_network = copy.deepcopy(self.network) tmp_network.remove_loss_layer() predicted_labels = np.zeros((X.shape[0], 1)) for i in range(X.shape[0]): predicted_probabilities = tmp_network.forward(np.array([X[i, :]]), None) predicted_labels[i, 0] = np.argmax(predicted_probabilities) del tmp_network return predicted_labels def _init_network(self): print('Network initialization ...') self.network = Network() # shapelets layer self.network.add_layer(self._get_shapelets_layer()) # linear layer self.network.add_layer(LinearLayer(self.n_shapelets, 24, self.eta, self.lamda, self.train_size), regularized=True) # sigmoid layer self.network.add_layer(SigmoidLayer(24)) # linear layer self.network.add_layer(LinearLayer(24, self.output_size, self.eta, self.lamda, self.train_size), regularized=True) # sigmoid layer self.network.add_layer(SigmoidLayer(self.output_size)) # loss layer self.network.add_layer(CrossEntropyLossLayer(self.lamda, self.train_size)) def _get_shapelets_layer(self): if self.shapelet_initialization == 'segments_centroids': print('Using training data to initialize shaplets') return self._create_shapelets_layer_segments_centroids() else: print('Randomly initialize shapelets') return self._create_shapelets_layer_random() def _create_shapelets_layer_segments_centroids(self): # Shapelets are included in SoftMinLayers min_soft_layers = [] for r in range(1, self.R + 1): L = r * self.L_min top_K_centroids_scale_r = utils.get_centroids_of_segments(self.train_data, L, self.K) for centroid in top_K_centroids_scale_r: min_soft_layers.append( SoftMinLayer(np.array([centroid]), self.eta, self.alpha)) # shapelets aggregation layer aggregator = AggregationLayer(min_soft_layers) return aggregator def _create_shapelets_layer_random(self): # Shapelets are included in SoftMinLayers min_soft_layers = [] for k in range(self.K): for r in range(1, self.R + 1): min_soft_layers.append( SoftMinLayer(np.random.normal(loc=0, scale=1, size=(1, r * self.L_min)), self.eta, self.alpha)) # shapelets aggregation layer aggregator = AggregationLayer(min_soft_layers) return aggregator def _train_network(self): print('Training ...') loss = np.zeros((1, self.epocs * self.train_size)) valid_accur = np.zeros((1, self.epocs * self.train_size)) if self.valid_data is None: print('Using training data for validation') self.valid_data = self.train_data self.valid_labels = self._orig_labels iteration = 0 for epoc in range(self.epocs): l = 10000 for sample_id in range(self.train_size): sample = np.array([self.train_data[sample_id]]) label = np.array([self.train_labels[sample_id]]) # perform a forward pass l = self.network.forward(sample, label) # perform a backward pass self.network.backward() # perform a parameter update self.network.update_params() iteration += 1 loss[0, epoc] = l # calculate accuracy in validation set valid_epoc_accur = np.sum( np.equal( self.predict(self.valid_data).ravel(), self.valid_labels ) ) / self.valid_labels.shape[0] valid_accur[0, epoc] = valid_epoc_accur # print current loss info print( 'epoch={}/{} (iteration={}) loss={} validation accuracy={}' ''.format(epoc + 1, self.epocs, iteration, l, valid_epoc_accur) ) # plot if needed if self.plot_loss: self._plot_loss(loss, valid_accur, epoc) if self.plot_loss: plt.savefig('loss.png') def _plot_loss(self, loss, validation_acc, epocs): if self.loss_ax is None: _, self.loss_ax = plt.subplots(figsize=(10, 10)) self.valid_ax = self.loss_ax.twinx() plt.xlabel("epoc") plt.ion() self.loss_ax.plot(range(epocs + 1), loss[0, 0:epocs + 1], color='red') self.loss_ax.set_ylabel('loss', color='red') self.loss_ax.set_xlabel('epoc') self.loss_ax.tick_params('y', colors='r') self.valid_ax.plot(range(epocs + 1), validation_acc[0, 0:epocs + 1], color='blue') self.valid_ax.set_ylabel('validation accuracy', color='blue') self.valid_ax.tick_params('y', colors='b') plt.pause(0.05) ================================================ FILE: shapelets_lts/network/__init__.py ================================================ from .linear_layer import LinearLayer from .aggregation_layer import AggregationLayer from .cross_entropy_loss_layer import CrossEntropyLossLayer from .sigmoid_layer import SigmoidLayer from .soft_min_layer import SoftMinLayer from .network import Network ================================================ FILE: shapelets_lts/network/aggregation_layer.py ================================================ from __future__ import print_function import numpy as np class AggregationLayer: def __init__(self, layers): self.layers = layers self.layers_number = len(layers) self.number_params = self._get_total_number_params() # layer input holder self.current_input = None # layer output holder self.current_output = None # derivative of Loss w.r.t. inputs self.dL_dinput = None def _get_total_number_params(self): total = 0 for layer_number in range(self.layers_number): total += self.layers[layer_number].get_size() return total def forward(self, layer_input): self.current_input = layer_input self.current_output = np.zeros((1, self.layers_number)) for layer_number in range(self.layers_number): self.current_output[0, layer_number] = self.layers[layer_number].forward(self.current_input) return self.current_output def backward(self, dL_dout): dL_dparams = np.zeros((1, self.number_params)) layer_segment_id = 0 for layer_number in range(self.layers_number): layer_size = self.layers[layer_number].get_size() dL_dparams[0, layer_segment_id:layer_segment_id + layer_size] = self.layers[layer_number].backward( dL_dout[0, layer_number])[:] layer_segment_id += layer_size return dL_dparams def get_params(self): """ :return: """ params = np.zeros((1, self.number_params)) layer_segment_id = 0 for layer_number in range(self.layers_number): layer_size = self.layers[layer_number].get_size() params[0, layer_segment_id:layer_segment_id + layer_size] = self.layers[layer_number].get_params()[:] layer_segment_id += layer_size return params def set_params(self, params): """ :param params: :return: """ layer_segment_id = 0 for layer_number in range(self.layers_number): layer_size = self.layers[layer_number].get_size() self.layers[layer_number].set_params(params[0, layer_segment_id:layer_segment_id + layer_size]) layer_segment_id += layer_size def update_params(self): for layer_number in range(self.layers_number): self.layers[layer_number].update_params() def get_shapelets(self): return [layer.get_shapelet() for layer in self.layers] ================================================ FILE: shapelets_lts/network/cross_entropy_loss_layer.py ================================================ from __future__ import print_function from __future__ import division import numpy as np class CrossEntropyLossLayer: def __init__(self, lamda, train_size): self.lamda = lamda self.I = train_size self.output_size = 1 # layer input holder self.current_input_probabilities = None # layer output holder self.current_output = None # derivative of Loss w.r.t. inputs self.dL_dinput = None # target probabilities self.current_target_probabilities = None # parameters to be penalized in the loss function self.regularized_params = None def set_current_target_probabilities(self, target_probabilities): self.current_target_probabilities = target_probabilities def set_regularized_params(self, regularized_params): self.regularized_params = regularized_params def forward(self, layer_input): self.current_input_probabilities = layer_input self.current_output = np.sum( -1 * self.current_target_probabilities * np.log(self.current_input_probabilities) + ( self.current_target_probabilities - 1) * np.log( 1 - self.current_input_probabilities)) # regularization part self.current_output += self.lamda * np.sum(self.regularized_params ** 2) / ( self.I * self.current_input_probabilities.size) return self.current_output def backward(self, dL_dout): self.dL_dinput = -self.current_target_probabilities / self.current_input_probabilities + \ (1 - self.current_target_probabilities) / (1 - self.current_input_probabilities) self.dL_dinput *= dL_dout return self.dL_dinput def update_params(self): pass ================================================ FILE: shapelets_lts/network/linear_layer.py ================================================ from __future__ import print_function from __future__ import print_function from __future__ import print_function import numpy as np class LinearLayer: def __init__(self, input_size, output_size, learning_rate, lamda, training_size): """ :param input_size: :param output_size: """ self.input_size = input_size self.output_size = output_size # learning rate self.eta = learning_rate # regularization factor self.lamda = lamda # training data size self.I = training_size # layer weights self.W = None # layer biases self.W_0 = None self.set_weights(np.random.normal(loc=0, scale=1, size=(1, output_size * input_size)), np.random.normal(loc=0, scale=1, size=(1, output_size))) # layer input holder self.current_input = None # layer output holder self.current_output = None # derivative of Loss w.r.t. inputs self.dL_dinput = None # derivative of Loss w.r.t. weights self.dL_dparams = None def set_weights(self, W, W_0): self.W = W self.W_0 = W_0 def forward(self, layer_input): """ :param layer_input: :return: """ self.current_input = layer_input self.current_output = np.dot(np.reshape(self.W, (self.output_size, self.input_size)), self.current_input.T) + np.reshape(self.W_0, (self.output_size, 1)) return self.current_output.T def backward(self, dL_dout): """ :param dL_dout: (1 X output_size) :return: dL_dinputs (1 X input_size), dL_dW (output_size X input_size), dL_dW_0 (output_size, 1) """ # dL_dW calculations dout_dparams = np.zeros((self.output_size, self.W.size + self.W_0.size)) output_W_index = 0 output_W_0_index = self.W.size for output_id in range(self.output_size): dout_dparams[output_id, output_W_index:output_W_index + self.input_size] = self.current_input dout_dparams[output_id, output_W_0_index] = 1 output_W_index += self.input_size output_W_0_index += 1 self.dL_dparams = np.dot(dL_dout, dout_dparams) # dL_dinputs calculations self.dL_dinput = np.dot(dL_dout, np.reshape(self.W, (self.output_size, self.input_size))) return self.dL_dinput def update_params(self): """ :param update_matrix: :return: """ self.W -= self.eta * ( self.dL_dparams[0, 0:self.W.size] + 2 * self.lamda * self.W / (self.I * self.output_size)) self.W_0 -= self.eta * self.dL_dparams[0, self.W.size:] def get_dL_dparams(self): return self.dL_dparams def get_params(self): """ :return: """ return np.concatenate((self.W.reshape((1, self.W.size)), self.W_0.reshape((1, self.W_0.size))), axis=1) def set_params(self, params): self.W = np.reshape(params[:, 0:params.size - self.W_0.size], (self.output_size, self.input_size)) self.W_0 = np.reshape(params[:, params.size - self.W_0.size:], (self.output_size, 1)) ================================================ FILE: shapelets_lts/network/network.py ================================================ from . import CrossEntropyLossLayer import numpy as np class Network: def __init__(self): self.layers = [] self.regularized = [] def add_layer(self, layer, regularized=False): self.layers.append(layer) self.regularized.append(regularized) def remove_loss_layer(self): if isinstance(self.layers[-1], CrossEntropyLossLayer): del self.layers[-1] def forward(self, sample, target): layer_input = sample for layer_id in range(len(self.layers)): if isinstance(self.layers[layer_id], CrossEntropyLossLayer): self.layers[layer_id].set_current_target_probabilities(target) self.layers[layer_id].set_regularized_params(self._get_regularized_params()) layer_input = self.layers[layer_id].forward(layer_input) return layer_input def backward(self): dL_dlayer_output = 1 for layer_id in range(len(self.layers) - 1, -1, -1): dL_dlayer_output = self.layers[layer_id].backward(dL_dlayer_output) def update_params(self): for layer_id in range(len(self.layers)): self.layers[layer_id].update_params() def get_layers(self): return self.layers def _get_regularized_params(self): regularized = [] for layer_id in range(len(self.layers)): if self.regularized[layer_id]: regularized.append(self.layers[layer_id].get_params()) return np.concatenate(regularized, axis=1) ================================================ FILE: shapelets_lts/network/sigmoid_layer.py ================================================ from shapelets_lts.util import utils class SigmoidLayer: def __init__(self, input_size): self.input_size = input_size self.output_size = input_size # layer input holder self.current_input = None # layer output holder self.current_output = None # derivative of Loss w.r.t. inputs self.dL_dinput = None def forward(self, layer_input): self.current_input = layer_input self.current_output = utils.sigmoid(self.current_input) return self.current_output def backward(self, dL_dout): dout_dinput = self.current_output * (1 - self.current_output) self.dL_dinput = dL_dout * dout_dinput return self.dL_dinput def update_params(self): pass ================================================ FILE: shapelets_lts/network/soft_min_layer.py ================================================ import numpy as np class SoftMinLayer: def __init__(self, sequence, learning_rate=0.01, alpha=-100): """ :type alpha: :param sequence: :param alpha: """ self.S = sequence self.L = np.size(sequence, 1) self.alpha = alpha # learning rate self.eta = learning_rate # layer input holder self.T = None # layer output holder self.current_output = None # derivative of Loss w.r.t. shapelet values self.dL_dS = None # holder of pre-calculated values to speed up the calculations self.J = None # number of segments in input time-series self.D = None # (1 X J) distances between shapelet and the current time-series segments self.xi = None # (1 X J) self.psi = None self.M = None # soft minimum distance def forward(self, layer_input): self.T = layer_input self.M = self.dist_soft_min() return self.M def backward(self, dL_dout): """ :param dL_dout: :return: dL_dS (1 X self.L) """ # (1 X J): derivative of M (soft minimum) w.r.t D_j (distance between shapelet and the segment j of the # time-series) dM_dD = self.xi * (1 + self.alpha * (self.D - self.M)) / self.psi # (J X L) : derivative of D_j w.r.t. S_l (shapelet value at position l) dD_dS = np.zeros((self.J, self.L)) for j in range(self.J): dD_dS[j, :] = 2 * (self.S - self.T[0, j:j + self.L]) / self.L # (1 X L) : derivative of M w.r.t. S_l dM_dS = np.dot(dM_dD, dD_dS) # (1 X L) : derivative of L w.r.t S_l. Note dL_dout is dL_dM self.dL_dS = dL_dout * dM_dS return self.dL_dS def dist_soft_min(self): Q = self.T.size self.J = Q - self.L + 1 M_numerator = 0 # for each segment of T self.D = np.zeros((1, self.J)) self.xi = np.zeros((1, self.J)) self.psi = 0 for j in range(self.J): self.D[0, j] = self.dist_sqr_error(self.T[0, j:j + self.L]) self.xi[0, j] = np.exp(self.alpha * self.D[0, j]) M_numerator += self.D[0, j] * self.xi[0, j] self.psi += self.xi[0, j] M = M_numerator / self.psi return M def dist_sqr_error(self, T_j): """ :param T: :return: """ dist = (T_j - self.S) ** 2 dist = np.sum(dist) / self.L return dist def get_params(self): """ :return: """ return self.S def set_params(self, param): """ :param param: :return: """ self.S = param def update_params(self): self.S -= self.eta * self.dL_dS def get_size(self): return self.L def get_shapelet(self): return self.S.ravel() ================================================ FILE: shapelets_lts/tests/test_aggregation_layer.py ================================================ import numpy as np from shapelets_lts.network import AggregationLayer from shapelets_lts.util import utils, soft_min_layer_factory def test_forward(): # create a set of soft_min_layers soft_min_layers = soft_min_layer_factory.create_soft_min_layers([3, 4, 5]) # create an aggregation aggregator = AggregationLayer(soft_min_layers) # create a layer input Q = 10 T = np.random.normal(loc=0, scale=1, size=(1, Q)) # do a forward pass output = aggregator.forward(T) # compare to the truth output output_truth = np.array([[layer.forward(T) for layer in soft_min_layers]]) assert (np.array_equal(output, output_truth)) def test_backward(): layer_sizes = [3, 4, 5] n_outputs = len(layer_sizes) # create a set of soft_min_layers soft_min_layers = soft_min_layer_factory.create_soft_min_layers(layer_sizes) # create an aggregation aggregator = AggregationLayer(soft_min_layers) # create a layer input Q = 10 T = np.random.normal(loc=0, scale=1, size=(1, Q)) # create a dL_dout dL_dout = np.random.normal(loc=0, scale=1, size=(1, n_outputs)) # do a forward and backward passes aggregator.forward(T) dL_dparams = aggregator.backward(dL_dout) # verify dL_dS ###### dout_dparams_truth = utils.approximate_derivative_wrt_params(aggregator, T, n_outputs, h=0.00001) dL_dparams_truth = np.dot(dL_dout, dout_dparams_truth) result = np.isclose(dL_dparams, dL_dparams_truth, rtol=1e-05, atol=1e-03) assert result.all() ================================================ FILE: shapelets_lts/tests/test_cross_entropy_loss_layer.py ================================================ import numpy as np from shapelets_lts.network import CrossEntropyLossLayer from shapelets_lts.util import utils def test_forward(): training_data_size = 10 lamda = 0.01 params = np.array([[1, 2, 3, 4]]) # create a cross entropy layer layer = CrossEntropyLossLayer(lamda, training_data_size) # create a layer input, input_probabilities = np.array([0.1, 0.4, 0.5]) target_probabilities = np.array([0, 0, 1]) # execute the layer layer.set_current_target_probabilities(target_probabilities) layer.set_regularized_params(params) layer_output = layer.forward(input_probabilities) # compare the layer output to the expected one output_truth = 1.30933331998 # calculated by hand assert (layer_output, output_truth) def test_backward(): training_data_size = 10 lamda = 0.01 params = np.array([[1, 2, 3, 4]]) # create a layer layer = CrossEntropyLossLayer(lamda, training_data_size) # create a layer input, input_probabilities = np.array([[0.7, 0.4, 0.5, 0.1]]) target_probabilities = np.array([[0, 1, 0, 0]]) # create dL_layer_doutput dL_doutput = 1 # perform a forward and a backward pass layer.set_current_target_probabilities(target_probabilities) layer.set_regularized_params(params) layer.forward(input_probabilities) dL_dinput = layer.backward(dL_doutput) # verify dL_dinput ###### doutput_dinput_truth = utils.approximate_derivative_wrt_inputs(layer.forward, input_probabilities, 1, h=0.000001) # n_outputs X n_inputs dL_dinput_truth = np.dot(dL_doutput, doutput_dinput_truth) result = np.isclose(dL_dinput, dL_dinput_truth) assert result.all() ================================================ FILE: shapelets_lts/tests/test_linear_layer.py ================================================ from __future__ import division from __future__ import print_function import numpy as np from shapelets_lts.network import LinearLayer from shapelets_lts.util import utils def test_fc_layer_initialization(): n_inputs = 15 n_outputs = 4 learning_rate = 0.01 regularization_parameter = 0.1 training_size = 10 fc_layer = LinearLayer(n_inputs, n_outputs, learning_rate, regularization_parameter, training_size) W = fc_layer.get_params() assert (W.shape == (1, n_outputs * (n_inputs + 1))) def test_forward(): n_inputs = 15 n_outputs = 4 learning_rate = 0.01 regularization_parameter = 0.1 training_size = 10 fc_layer = LinearLayer(n_inputs, n_outputs, learning_rate, regularization_parameter, training_size) # initialize weights W = np.ones((n_outputs, n_inputs)) W_0 = np.ones((n_outputs, 1)) fc_layer.set_weights(W, W_0) # create a layer input layer_input = np.ones((1, n_inputs)) # execute the layer layer_output = fc_layer.forward(layer_input) # compare the layer output to the expected one expected_output = np.array([[16., 16., 16., 16.]]) assert (np.array_equal(layer_output, expected_output)) def test_backword(): """ :return: """ # create a layer n_inputs = 3 n_outputs = 2 learning_rate = 0.01 regularization_parameter = 0.1 training_size = 10 fc_layer = LinearLayer(n_inputs, n_outputs, learning_rate, regularization_parameter, training_size) # create a layer input layer_input = np.random.normal(loc=0, scale=1, size=(1, n_inputs)) # create dL_layer_doutput dL_layer_output = np.random.normal(loc=0, scale=1, size=(1, n_outputs)) # do a forward and a backward pass fc_layer.forward(layer_input) dL_input = fc_layer.backward(dL_layer_output) dL_dparams = fc_layer.get_dL_dparams() # verify dL_dinput ###### doutput_input_truth = utils.approximate_derivative_wrt_inputs(fc_layer.forward, layer_input, n_outputs, h=0.01) # n_outputs X n_inputs dL_input_truth = np.dot(dL_layer_output, doutput_input_truth) result = np.isclose(dL_input, dL_input_truth) assert result.all() # verify dL_dW dout_dparams_truth = utils.approximate_derivative_wrt_params(fc_layer, layer_input, n_outputs, h=0.0001) dL_dparams_truth = np.dot(dL_layer_output, dout_dparams_truth) result = np.isclose(dL_dparams, dL_dparams_truth, rtol=1e-05, atol=1e-03) assert result.all() ================================================ FILE: shapelets_lts/tests/test_sigmoid_layer.py ================================================ from scipy.special import expit import numpy as np from shapelets_lts.network import SigmoidLayer from shapelets_lts.util import utils def test_forward(): n_inputs = 5 # create a layer sig_layer = SigmoidLayer(n_inputs) # create an input layer_input = np.zeros((1, n_inputs)) + 0.458 output = sig_layer.forward(layer_input) # compare to expected output output_truth = np.zeros((1, n_inputs)) + expit(0.458) assert (np.isclose(output, output_truth).all()) def test_backward(): n_inputs = 5 # create a layer sig_layer = SigmoidLayer(n_inputs) # create a layer input layer_input = np.random.normal(loc=0, scale=1, size=(1, n_inputs)) # create dL_layer_doutput dL_doutput = np.random.normal(loc=0, scale=1, size=(1, n_inputs)) # perform a forward and a backward pass sig_layer.forward(layer_input) dL_dinput = sig_layer.backward(dL_doutput) # verify dL_dinput ###### doutput_dinput_truth = utils.approximate_derivative_wrt_inputs(sig_layer.forward, layer_input, n_inputs, h=0.00001) # n_outputs X n_inputs dL_dinput_truth = np.dot(dL_doutput, doutput_dinput_truth) result = np.isclose(dL_dinput, dL_dinput_truth) assert result.all() ================================================ FILE: shapelets_lts/tests/test_soft_min_layer.py ================================================ import numpy as np from shapelets_lts.network import SoftMinLayer from shapelets_lts.util import utils def test_forward(): soft_layer = SoftMinLayer(np.ones((1, 5))) T = np.ones((1, 10)) + 1 assert (soft_layer.forward(T) == 1) def test_backward(): # create the soft min layer L = 5 shapelet = np.random.normal(loc=0, scale=1, size=(1, L)) soft_layer = SoftMinLayer(shapelet) # create a layer input Q = 10 T = np.random.normal(loc=0, scale=1, size=(1, Q)) # create dL_dM dL_dM = np.random.normal() # do a forward and a backward pass soft_layer.forward(T) dL_dS = soft_layer.backward(dL_dM) # verify dL_dS ###### dM_dS_truth = utils.approximate_derivative_wrt_params(soft_layer, T, 1, h=0.00001) dL_dS_truth = dL_dM * dM_dS_truth result = np.isclose(dL_dS, dL_dS_truth, rtol=1e-05, atol=1e-04) assert result.all() def test_shapelet_dist_sqr_error(): soft_layer = SoftMinLayer(np.ones((1, 10))) assert (soft_layer.dist_sqr_error(np.zeros((1, 10))) == 1) ================================================ FILE: shapelets_lts/tests/test_utils.py ================================================ import numpy as np from shapelets_lts.util import utils def test_get_centroids(): cluster_size = 5 n_dims = 2 n_clusters = 3 cluster_1_data = np.random.normal(loc=0, scale=1, size=(cluster_size, n_dims)) cluster_2_data = np.random.normal(loc=5, scale=1, size=(cluster_size, n_dims)) cluster_3_data = np.random.normal(loc=9, scale=1, size=(cluster_size, n_dims)) data = np.concatenate((cluster_1_data, cluster_2_data, cluster_3_data), axis=0) centroids = utils.get_centroids(data, n_clusters) assert (centroids.shape == (n_clusters, n_dims)) def test_segment_dataset(): I = 2 # number of time-series Q = 4 # time series size L = 2 # segment length J = Q - L + 1 # segments per time-series data = np.random.normal(loc=5, scale=1, size=(I, Q)) S = utils.segment_dataset(data, 2) # segment assert (S.shape == (J * I, L)) assert (np.array_equal(S[I * J - 1], data[I - 1, Q - L:])) def test_get_centroids_of_segments(): n_samples = 50 n_dims = 2 n_clusters = 2 cluster_1_data = np.random.normal(loc=0, scale=0.01, size=(n_samples, n_dims)) cluster_2_data = np.random.normal(loc=5, scale=0.01, size=(n_samples, n_dims)) data = np.concatenate((cluster_1_data, cluster_2_data), axis=1) centroids = utils.get_centroids_of_segments(data, n_dims, n_clusters + 1) # centroids should be close to (0,0) (0,5) (5,5) centroid1_occur = 0 centroid2_occur = 0 centroid3_occur = 0 for centroid in centroids: if np.isclose(centroid, (0, 0), rtol=1e-05, atol=1e-02).all(): centroid1_occur += 1 if np.isclose(centroid, (0, 5), rtol=1e-05, atol=1e-02).all(): centroid2_occur += 1 if np.isclose(centroid, (5, 5), rtol=1e-05, atol=1e-02).all(): centroid3_occur += 1 assert (centroid1_occur == centroid2_occur == centroid3_occur == 1) ================================================ FILE: shapelets_lts/util/__init__.py ================================================ from __future__ import division, print_function from .plotting import plot_sample_shapelets ================================================ FILE: shapelets_lts/util/plotting.py ================================================ from __future__ import division, print_function from random import sample import pandas as pd import seaborn as sns def plot_sample_shapelets(shapelets, sample_size=1e6): """Plots a random sample from the passed shapelets. Args: shapelets (list): list of 1-d numpy arrays, one for each shapelet. sample_size: number of random shapelets to be selected for plotting. """ # select maximum of len(shapelets) sample shapelets some_shapelets = sample(shapelets, min(sample_size, len(shapelets))) # put the shapelets in a format suitable for plotting with seaborn def _shapelet_to_df(id_, shapelet): return pd.DataFrame( dict(shapelet=id_, X=range(len(shapelet)), Y=shapelet) ) shapelets_df = pd.concat( objs=[_shapelet_to_df(_id, s) for _id, s in enumerate(some_shapelets)], axis=0 ).reset_index(drop=True) # plot the shapelets grid = sns.FacetGrid(shapelets_df, col="shapelet", col_wrap=6) grid.map(sns.lineplot, "X", "Y") ================================================ FILE: shapelets_lts/util/soft_min_layer_factory.py ================================================ import numpy as np from shapelets_lts.network.soft_min_layer import SoftMinLayer def create_soft_min_layers(sizes): layers = [] for size in sizes: layers.append(SoftMinLayer(np.ones((1, size)))) return layers ================================================ FILE: shapelets_lts/util/ucr_dataset_loader.py ================================================ from os import path from numpy import genfromtxt def load_dataset(dataset_name, dataset_folder): dataset_path = path.join(dataset_folder, dataset_name) train_file_path = path.join(dataset_path, '{}_TRAIN'.format(dataset_name)) test_file_path = path.join(dataset_path, '{}_TEST'.format(dataset_name)) # training data train_raw_arr = genfromtxt(train_file_path, delimiter=',') train_data = train_raw_arr[:, 1:] train_labels = train_raw_arr[:, 0] - 1 # one was subtracted to change the labels to 0 and 1 instead of 1 and 2 # test_data test_raw_arr = genfromtxt(test_file_path, delimiter=',') test_data = test_raw_arr[:, 1:] test_labels = test_raw_arr[:, 0] - 1 return train_data, train_labels, test_data, test_labels ================================================ FILE: shapelets_lts/util/utils.py ================================================ from __future__ import print_function import numpy as np import math from sklearn.cluster import KMeans def approximate_derivative_wrt_inputs(function, inputs, n_outputs, h): """ https://en.wikipedia.org/wiki/Finite_difference#Relation_with_derivatives :param function: :param inputs: :param n_outputs: :param h: :return: """ n_inputs = inputs.size dFunction_dinputs = np.zeros((n_outputs, n_inputs)) for input_id in range(n_inputs): f1 = function(inputs) # 1 X n_outputs inputs[0, input_id] += h f2 = function(inputs) # 1 X n_outputs inputs[0, input_id] -= h dFunction_dinputs[:, input_id] = (f2 - f1) / h return dFunction_dinputs def approximate_derivative_wrt_params(layer, inputs, n_outputs, h): """ https://en.wikipedia.org/wiki/Finite_difference#Relation_with_derivatives :param n_outputs: :param layer: :param inputs: :param h: :return: """ params = layer.get_params() n_params = params.size dlayer_dparams = np.zeros((n_outputs, n_params)) for param_id in range(n_params): f1 = layer.forward(inputs) # 1 X n_outputs params[0, param_id] += h layer.set_params(params) f2 = layer.forward(inputs) # 1 X n_outputs params[0, param_id] -= h layer.set_params(params) dlayer_dparams[:, param_id] = (f2 - f1) / h return dlayer_dparams def sigmoid(X): return np.array([[(1 / (1 + math.exp(-x))) for x in X[0, :]]]) def get_one_active_representation(labels): classes = np.unique(labels) one_active_labels = np.zeros((labels.size, classes.size)) for label_id in range(labels.size): one_active_labels[label_id, np.where(classes == labels[label_id])] = 1 return one_active_labels def get_centroids_of_segments(data, L, K): """ :param data: the dataset :param L: segment length :param K: number of centroids :return: the top K centroids of the clustered segments """ data_segmented = segment_dataset(data, L) centroids = get_centroids(data_segmented, K) return centroids def segment_dataset(data, L): """ :param data: :param L: segment length :return: """ # number of time series, time series size I, Q = data.shape # number of segments in a time series J = Q - L + 1 S = np.zeros((J * I, L)) # create segments for i in range(I): for j in range(J): S[i * J + j, :] = data[i, j:j + L] return S def get_centroids(data, k): clusterer = KMeans(n_clusters=k) clusterer.fit(data) return clusterer.cluster_centers_