Repository: llp1992/MachineLearning Branch: master Commit: 315c00285b75 Files: 58 Total size: 120.4 KB Directory structure: gitextract_cc07p_s4/ ├── Adaboost/ │ ├── README.md │ ├── adaboost.py │ └── testAdaboost.py ├── Decision-Tree/ │ ├── README.md │ ├── TestTree.py │ └── Tree.py ├── DeepLearning/ │ ├── CNN_cifar-10/ │ │ └── cifar.py │ ├── CNN_mnist/ │ │ ├── cnn.py │ │ ├── data.py │ │ └── trainCNN.py │ └── UFLDL/ │ ├── Vectorization_sparseae_exercise/ │ │ ├── checkNumericalGradient.m │ │ ├── computeNumericalGradient.m │ │ ├── display_network.m │ │ └── initializeParameters.m │ └── stl_exercise/ │ ├── display_network.m │ ├── feedForwardAutoencoder.m │ ├── initializeParameters.m │ ├── loadMNISTImages.m │ ├── loadMNISTLabels.m │ ├── softmaxCost.m │ ├── softmaxPredict.m │ ├── softmaxTrain.m │ ├── sparseAutoencoderCost.m │ └── stlExercise.m ├── GMM/ │ ├── README.md │ ├── gmm.m │ ├── gmm.py │ ├── testGMM.m │ └── testSet.txt ├── KNN/ │ ├── KNN.m │ ├── KNN.py │ ├── KNNdatgingTest.m │ ├── README.md │ ├── datingTestSet2.txt │ └── handWritingTest.m ├── Kmeans/ │ ├── README.md │ ├── distEclud.m │ ├── kMeans.m │ ├── testSet.txt │ └── testkMeans.m ├── Logistic-regression/ │ ├── ImproveStocGradAscent.m │ ├── README.md │ ├── gradAscent.m │ ├── stocGradAscent.m │ └── testSet.txt ├── MLP/ │ ├── dualperceptron.py │ ├── perceptron.py │ └── testSet.txt ├── PCA/ │ ├── PCA.m │ ├── README.md │ ├── testPCA.m │ └── testSet.txt ├── README.md ├── bikMeans/ │ ├── README.md │ ├── bikMeans.m │ └── testSet.txt └── kalmanFilter/ ├── KF.m └── kalmanFiltering.m ================================================ FILE CONTENTS ================================================ ================================================ FILE: Adaboost/README.md ================================================ Adaboost 算法实现。 更多机器学习深度学习博客请关注CSDN博客:[LiuLongpo](http://blog.csdn.net/llp1992) ================================================ FILE: Adaboost/adaboost.py ================================================ # -*- coding: utf-8 -*- """ Created on Thu Jun 11 12:42:48 2015 @author: liu """ from numpy import * def loadSimpData(): dataMat = array([[1.,2.1],[1.5,1.6],[1.3,1.],[1.,1.],[2.,1.],[1.2,1.1],\ [1.1,0.4],[0.9,1.3],[0.86,1.2],[1.8,1.8],[1.7,1.5],[1.9,1.8]]) classLabels = array([1.0,1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,\ -1.0,1.0,1.0,1.0]) return dataMat,classLabels # 单树桩分类器,也就是简单的单层决策树弱分类器 # 该函数根据某个最好的特征的最好划分点对数据进行分类 # demen就是特征,threshVal就是划分点,retArray就是返回的分类结果 def stumpClassify(dataMatrix,dimen,threshVal,threshIneq): retArray = ones((shape(dataMatrix)[0],1)) if threshIneq == 'lt': retArray[dataMatrix[:,dimen]<=threshVal] = -1.0 else: retArray[dataMatrix[:,dimen]>threshVal] = -1.0 return retArray # 创建树桩,返回最好的树桩和该树桩的分类最小误差以及分类结果 # 该函数用于从数据集中找到最好的划分特征以及该特征的最好划分点 def buildStump(dataArr,classLabels,D): # 建立备份的数据 dataMatrix = mat(dataArr); labelMat = mat(classLabels).T # m 是行,也就是每个样本,n是列,也就是每个特征 m,n = shape(dataMatrix) # 步数的设置,也就是在每个特征的最大值和最小值中分几次设置阈值进行数据划分 # 也就是获取这个特征的最佳划分点,步数越高,特征点寻找得越精细,但耗时更多 # 用字典来存储bestStump的数据 numSteps = 10.0;bestStump = {};bestClassEst = mat(zeros((m,1))) minError = inf # 对每个特征 for i in range(n): # 获取每个特征的最小值和最大值 rangeMin = dataMatrix[:,i].min(); rangeMax = dataMatrix[:,i].max(); stepSize = (rangeMax-rangeMin)/numSteps for j in range(-1,int(numSteps)+1): for inequal in ['lt','gt']: threshVal = (rangeMin + float(j) * stepSize) # 预测值 predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal) errArr = mat(ones((m,1))) errArr[predictedVals == labelMat] = 0 # 样本权重乘以样本误差 weightedError = D.T * errArr # 下面的 .2f 表示浮点数小数殿后两位, .3f 表示小数点后3位 print "split:dim %d,thresh %.2f, thresh ineqal: %s,the weighted eror is %.3f" %\ (i,threshVal,inequal,weightedError) if weightedError < minError: minError = weightedError # 最好的分类结果 bestClassEst = predictedVals.copy() bestStump['dim'] = i bestStump['thresh'] = threshVal bestStump['ineq'] = inequal return bestStump,minError,bestClassEst def adaBoostTrainDS(dataArr,classLabels,numIt = 40): weakClassArr = [] m = shape(dataArr)[0] D = mat(ones((m,1))/m) # 创建矩阵 mat aggClassEst = mat(zeros((m,1))) for i in range(numIt): bestStump,error,classEst = buildStump(dataArr,classLabels,D) print 'D:',D.T # log 就是 ln 公式: a = 0.5*ln((1-e)/e) alpha = float(0.5*log((1.0-error)/max(error,1e-16))) bestStump['alpha'] = alpha # 将当前的最好的树桩添加到弱分类其数组中 weakClassArr.append(bestStump) print 'classEst:',classEst.T # D权重的更新公式,利用原本的类别classLabels与划分的类别classEst做乘积 # 用来同时计算正确划分和错误划分的公式,也就是自动确定正负号 expon = multiply(-1*alpha*mat(classLabels).T,classEst) # 更新权重D D = multiply(D,exp(expon)) D = D/D.sum() aggClassEst += alpha*classEst print 'aggClassEst:',aggClassEst.T aggErrors = multiply(sign(aggClassEst)!=mat(classLabels).T,ones((m,1))) errorRate = aggErrors.sum()/m print 'total error:',errorRate,"\n" if errorRate == 0.0:break; return weakClassArr # 利用学习得到的多个级联弱分类器进行数据分类 def adaClassify(datToClass,classifierArr): dataMatrix = mat(datToClass) m = shape(dataMatrix)[0] aggClassEst = mat(zeros((m,1))) for i in range(len(classifierArr)): classEst = stumpClassify(dataMatrix,classifierArr[i]['dim'],\ classifierArr[i]['thresh'],classifierArr[i]['ineq']) aggClassEst += classifierArr[i]['alpha'] * classEst print aggClassEst return sign(aggClassEst) ================================================ FILE: Adaboost/testAdaboost.py ================================================ # -*- coding: utf-8 -*- """ Created on Thu Jun 11 12:57:27 2015 @author: LiuLongpo """ import optunity import adaboost import matplotlib.pyplot as plt from numpy import * dataMat,classLabels = adaboost.loadSimpData() #plt.scatter(dataMat[:,0],dataMat[:,1]) # D是样本的权重矩阵 D = mat(ones((5,1))/5) #adaboost.buildStump(dataMat,classLabels,D) print 'data train...' classifierArr = adaboost.adaBoostTrainDS(dataMat,classLabels,30) print 'getClassifier:',classifierArr print 'data predict...' # 学习得到3个分类器,predict时,每一个分类器级联分类得到的预测累加值 # aggClassEst越来越远离0,也就是正越大或负越大,也就是分类结果越来越强 adaboost.adaClassify([[1,0.8],[1.8,2]],classifierArr) # 0,lt,1.3 1,lt,1.0 0,lt,0.9 plt.figure() I = nonzero(classLabels>0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'r',marker=u'o') I = nonzero(classLabels<0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'b',marker=u'o') plt.plot([1.32,1.32],[0.5,2.5]) plt.plot([0.5,2.5],[1.42,1.42]) plt.plot([0.97,0.97],[0.5,2.5]) ''' plt.figure() I = nonzero(classLabels>0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'r',marker=u'o') I = nonzero(classLabels<0)[0] plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'b',marker=u'o') plt.plot([1.32*1.19,1.32*1.19],[0.5,2.5]) plt.plot([0.5,2.5],[1.42*1.52,1.42*1.52]) plt.plot([0.97*1.13,0.97*1.13],[0.5,2.5]) #plt.scatter([0,5],[0,5]) ''' ================================================ FILE: Decision-Tree/README.md ================================================ 决策树算法实现。 更多机器学习深度学习博客请关注CSDN博客:[LiuLongpo](http://blog.csdn.net/llp1992) ================================================ FILE: Decision-Tree/TestTree.py ================================================ import Tree dataSet,label = Tree.createDataSet() print 'dataSet:' , dataSet #Tree.createTree(dataSet,label) ================================================ FILE: Decision-Tree/Tree.py ================================================ from math import log # 计算熵 def calcShannonEnt(dataSet): numEntries = len(dataSet) labelCounts = {} for featVec in dataSet: currentLabel = featVec[-1] if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0 labelCounts[currentLabel] += 1 shannoEnt = 0.0 for key in labelCounts: prob = float(labelCounts[key])/numEntries shannoEnt -= prob * log(prob,2) return shannoEnt def createDataSet(): dataSet = [[1,1,0,'yes'],[1,1,1,'yes'],[1,0,1,'no'],[0,1,0,'no'],[0,1,0,'no']] label = ['no surfacing','flippers'] return dataSet,label # 划分数据集 # axis为特征,也就是对某一个特征进行判定,比如身高 # value为特征的值,也就是说,某一个特征的不同值, # 比如身高这个特征有高和矮之分,高跟矮就是这个value # 数据集为 [ [1,1,0,'yes'],[1,1,1,'yes'],[1,0,1,'no'],[0,1,0,'no'],[0,1,0,'no'] ] # 每一个 feaVec 就是一行,也就是一个样本数据 def spiltDataSet(dataSet,axis,value): retDataSet = [] for featVec in dataSet: # 如果当前样本数据的特征 axis 的值 featVec[axis] 与我们要求的value相等 # 也就是我们认为当前的样本符合我们的要求 if featVec[axis] == value: # 对于符合要求的样本数据 # 我们将这个特征剪掉,剩下的数据组成一个新的子样本并返回 redecedFeatVec = featVec[:axis] redecedFeatVec.extend(featVec[axis+1:]) retDataSet.append(redecedFeatVec) return retDataSet # 寻找最优的划分特征 def chooseBestFeatureToSplit(dataSet): # 为什么减一? numFeatures = len(dataSet[0])-1 # 计算源数据的熵 baseEntropy = calcShannonEnt(dataSet) bestInfoGain = 0.0 bestFeature = -1 for i in xrange(numFeatures): # 获取每一列,因为每一列都属于一个特征的不同value featList = [example[i] for example in dataSet] # 确保每个值都是唯一的 uniqueVals = set(featList) newEntorpy = 0.0 print 'spilt feature ', i # 对每个value进行划分 for value in uniqueVals: subDataSet = spiltDataSet(dataSet,i,value) print 'subDataSet:' , subDataSet prob = len(subDataSet)/float(len(dataSet)) # 计算该特征进行划分后的信息 newEntorpy += prob * calcShannonEnt(subDataSet) # 计算该特征进行划分后的信息增益 infoGain = baseEntropy - newEntorpy # 寻找信息增益最大的特征就是最好划分的特征 if (infoGain > bestInfoGain): bestInfoGain = infoGain bestFeature = i print 'BestFeatureToSplit' print bestFeature return bestFeature # 投票表决,获取概率最大的那个分类 def majorityCnt(classList): classCount = {} for vote in classList: if vote not in classCount.keys():classCount[vote] = 0 classCount[vote] += 1 # 反向排序,也就是从大到小 sortedClassCount = sorted(classCount.iteritems(),key = operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] def createTree(dataSet,labels): # 获取每个样本数据的分类 classList = [example[-1] for example in dataSet] # 第一个迭代终止条件:选取第一个classList中的类,判断它出现的次数是否与 # classList的长度相等,如果相等,证明classList中的所有类已经被分为同一种类 # 则返回该类 if classList.count(classList[0]) == len(classList): return classList[0] #如果所有特征都迭代完了,此时还没返回, # 说明当前仍然不能讲数据集划分为仅包含唯一类别的分组 # 此时则返回次数出现最多的那个类别最为当前数据集的类别 if len(dataSet[0]) == 1: return majorityCnt(classList) # 寻找最优的划分特征 bestFeat = chooseBestFeatureToSplit(dataSet) bestFeatLabel = labels[bestFeat] myTree = {bestFeatLabel:{}} del(labels[bestFeat]) # 获取最优特征的各个属性 featValues = [example[bestFeat] for example in dataSet] uniqueVals = set(featValues) for value in uniqueVals: subLabels = labels[:] # 迭代创建决策树 myTree[bestFeatLabel][value] = createTree(spiltDataSet(dataSet,bestFeat,value),subLabels) return myTree ================================================ FILE: DeepLearning/CNN_cifar-10/cifar.py ================================================ # -*- coding: utf-8 -*- """ Created on Thu Aug 27 11:27:34 2015 @author: lab-liu.longpo """ from __future__ import absolute_import from __future__ import print_function from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.optimizers import SGD, Adadelta, Adagrad from keras.utils import np_utils, generic_utils import matplotlib.pyplot as plt import numpy as np import scipy.io as sio d = sio.loadmat('data.mat') data = d['d'] label = d['l'] data = np.reshape(data,(50000,3,32,32)) label = np_utils.to_categorical(label, 10) print ('finish loading data') model = Sequential() model.add(Convolution2D(32, 3, 5, 5, border_mode='valid')) model.add(Activation('relu')) #model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(32, 32, 5, 5, border_mode='valid')) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(64, 32, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(64*5*5, 512, init='normal')) model.add(Activation('tanh')) model.add(Dense(512, 10, init='normal')) model.add(Activation('softmax')) sgd = SGD(l2=0.001,lr=0.0065, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd,class_mode="categorical") #checkpointer = ModelCheckpoint(filepath="weight.hdf5",verbose=1,save_best_only=True) #model.fit(data, label, batch_size=100,nb_epoch=10,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2,callbacks=[checkpointer]) result = model.fit(data, label, batch_size=50,nb_epoch=35,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2) #model.save_weights(weights,accuracy=False) # plot the result plt.figure plt.plot(result.epoch,result.history['acc'],label="acc") plt.plot(result.epoch,result.history['val_acc'],label="val_acc") plt.scatter(result.epoch,result.history['acc'],marker='*') plt.scatter(result.epoch,result.history['val_acc']) plt.legend(loc='under right') plt.show() plt.figure plt.plot(result.epoch,result.history['loss'],label="loss") plt.plot(result.epoch,result.history['val_loss'],label="val_loss") plt.scatter(result.epoch,result.history['loss'],marker='*') plt.scatter(result.epoch,result.history['val_loss'],marker='*') plt.legend(loc='upper right') plt.show() ================================================ FILE: DeepLearning/CNN_mnist/cnn.py ================================================ # -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import print_function from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.advanced_activations import PReLU from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.optimizers import SGD, Adadelta, Adagrad from keras.utils import np_utils, generic_utils from data import load_data import matplotlib.pyplot as plt ''' train the dataset of face 42000 image learning rate : 0.0015 pathc size:100 dropout :0.25 l2 = 0.01 ''' def funcnn(LR,BS): data, label = load_data() label = np_utils.to_categorical(label, 10) model = Sequential() model.add(Convolution2D(4, 1, 5, 5, border_mode='valid')) model.add(Activation('relu')) model.add(Dropout(0.25)) model.add(Convolution2D(8,4, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(16, 8, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(16*4*4, 256, init='normal')) model.add(Activation('tanh')) model.add(Dense(256, 10, init='normal')) model.add(Activation('softmax')) sgd = SGD(l2=0.001,lr=LR, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd,class_mode="categorical") #checkpointer = ModelCheckpoint(filepath="weight.hdf5",verbose=1,save_best_only=True) #model.fit(data, label, batch_size=100,nb_epoch=10,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2,callbacks=[checkpointer]) result = model.fit(data, label, batch_size=BS,nb_epoch=20,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2) #model.save_weights(weights,accuracy=False) # plot the result plt.figure plt.plot(result.epoch,result.history['acc'],label="acc") plt.plot(result.epoch,result.history['val_acc'],label="val_acc") plt.scatter(result.epoch,result.history['acc'],marker='*') plt.scatter(result.epoch,result.history['val_acc']) plt.legend(loc='under right') plt.show() plt.figure plt.plot(result.epoch,result.history['loss'],label="loss") plt.plot(result.epoch,result.history['val_loss'],label="val_loss") plt.scatter(result.epoch,result.history['loss'],marker='*') plt.scatter(result.epoch,result.history['val_loss'],marker='*') plt.legend(loc='upper right') plt.show() ================================================ FILE: DeepLearning/CNN_mnist/data.py ================================================ # -*- coding: utf-8 -*- import os from PIL import Image import numpy as np def load_data(): print 'start loading data...' data = np.empty((42000,1,28,28),dtype="float32") label = np.empty((42000,),dtype="uint8") imgs = os.listdir("./mnist") num = len(imgs) for i in range(num): img = Image.open("./mnist/"+imgs[i]) arr = np.asarray(img,dtype="float32") data[i,:,:,:] = arr label[i] = int(imgs[i].split('.')[0]) return data,label ================================================ FILE: DeepLearning/CNN_mnist/trainCNN.py ================================================ # -*- coding: utf-8 -*- """ Created on Tue Aug 25 10:59:38 2015 @author: lab-liu.longpo """ from cnn import funcnn import numpy as np def floatrange(start,stop,steps): return [start+float(i) * (stop-start)/(float(steps)-1) for i in range(steps)] #LR = [0.03] #LR = floatrange(0.033,0.036,10) LR = [0.0015] BS = [100] #result = np.empty((3,8,10),dtype="float32") k = 0; for i in range(1): for j in range(1): print 'test',k k = k+1 tmp = funcnn(LR[i],BS[j]) #result[i,j,:] = tmp.history['val_acc'] print 'learning rate:',LR[i],'batch size:',BS[j] ================================================ FILE: DeepLearning/UFLDL/Vectorization_sparseae_exercise/checkNumericalGradient.m ================================================ function [] = checkNumericalGradient() % This code can be used to check your numerical gradient implementation % in computeNumericalGradient.m % It analytically evaluates the gradient of a very simple function called % simpleQuadraticFunction (see below) and compares the result with your numerical % solution. Your numerical gradient implementation is incorrect if % your numerical solution deviates too much from the analytical solution. % Evaluate the function and gradient at x = [4; 10]; (Here, x is a 2d vector.) x = [4; 10]; [value, grad] = simpleQuadraticFunction(x); % Use your code to numerically compute the gradient of simpleQuadraticFunction at x. % (The notation "@simpleQuadraticFunction" denotes a pointer to a function.) numgrad = computeNumericalGradient(@simpleQuadraticFunction, x); % Visually examine the two gradient computations. The two columns % you get should be very similar. disp([numgrad grad]); fprintf('The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n'); % Evaluate the norm of the difference between two solutions. % If you have a correct implementation, and assuming you used EPSILON = 0.0001 % in computeNumericalGradient.m, then diff below should be 2.1452e-12 diff = norm(numgrad-grad)/norm(numgrad+grad); disp(diff); fprintf('Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n'); end function [value,grad] = simpleQuadraticFunction(x) % this function accepts a 2D vector as input. % Its outputs are: % value: h(x1, x2) = x1^2 + 3*x1*x2 % grad: A 2x1 vector that gives the partial derivatives of h with respect to x1 and x2 % Note that when we pass @simpleQuadraticFunction(x) to computeNumericalGradients, we're assuming % that computeNumericalGradients will use only the first returned value of this function. value = x(1)^2 + 3*x(1)*x(2); grad = zeros(2, 1); % 对X1 求偏导 grad(1) = 2*x(1) + 3*x(2); % 对X2 求偏导 grad(2) = 3*x(1); end ================================================ FILE: DeepLearning/UFLDL/Vectorization_sparseae_exercise/computeNumericalGradient.m ================================================ function numgrad = computeNumericalGradient(J, theta) % numgrad = computeNumericalGradient(J, theta) % theta: a vector of parameters % J: a function that outputs a real-number. Calling y = J(theta) will return the % function value at theta. % Initialize numgrad with zeros 2x1 这是输入X的行数,其实也就是参数W的个数 numgrad = zeros(size(theta)); %% ---------- YOUR CODE HERE -------------------------------------- % Instructions: % Implement numerical gradient checking, and return the result in numgrad. % (See Section 2.3 of the lecture notes.) % You should write code so that numgrad(i) is (the numerical approximation to) the % partial derivative of J with respect to the i-th input argument, evaluated at theta. % I.e., numgrad(i) should be the (approximately) the partial derivative of J with % respect to theta(i). % % Hint: You will probably want to compute the elements of numgrad one at a time. epsilon = 0.000001; % 2 x2 得到参数的个数 numW = size(theta,1); I = eye(numW); I = I * epsilon; for j = 1:numW numgrad(j) = (J(theta+I(:,j)) - J(theta-I(:,j))) / (2 * epsilon); end %% --------------------------------------------------------------- end ================================================ FILE: DeepLearning/UFLDL/Vectorization_sparseae_exercise/display_network.m ================================================ function [h, array] = display_network(A, opt_normalize, opt_graycolor, cols, opt_colmajor) % This function visualizes filters in matrix A. Each column of A is a % filter. We will reshape each column into a square image and visualizes % on each cell of the visualization panel. % All other parameters are optional, usually you do not need to worry % about it. % opt_normalize: whether we need to normalize the filter so that all of % them can have similar contrast. Default value is true. % opt_graycolor: whether we use gray as the heat map. Default is true. % cols: how many columns are there in the display. Default value is the % squareroot of the number of columns in A. % opt_colmajor: you can switch convention to row major for A. In that % case, each row of A is a filter. Default value is false. warning off all if ~exist('opt_normalize', 'var') || isempty(opt_normalize) opt_normalize= true; end if ~exist('opt_graycolor', 'var') || isempty(opt_graycolor) opt_graycolor= true; end if ~exist('opt_colmajor', 'var') || isempty(opt_colmajor) opt_colmajor = false; end % rescale A = A - mean(A(:)); if opt_graycolor, colormap(gray); end % compute rows, cols [L M]=size(A); sz=sqrt(L); buf=1; if ~exist('cols', 'var') if floor(sqrt(M))^2 ~= M n=ceil(sqrt(M)); while mod(M, n)~=0 && n<1.2*sqrt(M), n=n+1; end m=ceil(M/n); else n=sqrt(M); m=n; end else n = cols; m = ceil(M/n); end array=-ones(buf+m*(sz+buf),buf+n*(sz+buf)); if ~opt_graycolor array = 0.1.* array; end if ~opt_colmajor k=1; for i=1:m for j=1:n if k>M, continue; end clim=max(abs(A(:,k))); if opt_normalize array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/clim; else array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/max(abs(A(:))); end k=k+1; end end else k=1; for j=1:n for i=1:m if k>M, continue; end clim=max(abs(A(:,k))); if opt_normalize array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/clim; else array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz); end k=k+1; end end end if opt_graycolor h=imagesc(array,'EraseMode','none',[-1 1]); else h=imagesc(array,'EraseMode','none',[-1 1]); end axis image off drawnow; warning on all ================================================ FILE: DeepLearning/UFLDL/Vectorization_sparseae_exercise/initializeParameters.m ================================================ function theta = initializeParameters(hiddenSize, visibleSize) %% Initialize parameters randomly based on layer sizes. r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] W1 = rand(hiddenSize, visibleSize) * 2 * r - r; W2 = rand(visibleSize, hiddenSize) * 2 * r - r; b1 = zeros(hiddenSize, 1); b2 = zeros(visibleSize, 1); % Convert weights and bias gradients to the vector form. % This step will "unroll" (flatten and concatenate together) all % your parameters into a vector, which can then be used with minFunc. theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/display_network.m ================================================ function [h, array] = display_network(A, opt_normalize, opt_graycolor, cols, opt_colmajor) % This function visualizes filters in matrix A. Each column of A is a % filter. We will reshape each column into a square image and visualizes % on each cell of the visualization panel. % All other parameters are optional, usually you do not need to worry % about it. % opt_normalize: whether we need to normalize the filter so that all of % them can have similar contrast. Default value is true. % opt_graycolor: whether we use gray as the heat map. Default is true. % cols: how many columns are there in the display. Default value is the % squareroot of the number of columns in A. % opt_colmajor: you can switch convention to row major for A. In that % case, each row of A is a filter. Default value is false. warning off all if ~exist('opt_normalize', 'var') || isempty(opt_normalize) opt_normalize= true; end if ~exist('opt_graycolor', 'var') || isempty(opt_graycolor) opt_graycolor= true; end if ~exist('opt_colmajor', 'var') || isempty(opt_colmajor) opt_colmajor = false; end % rescale A = A - mean(A(:)); if opt_graycolor, colormap(gray); end % compute rows, cols [L M]=size(A); sz=sqrt(L); buf=1; if ~exist('cols', 'var') if floor(sqrt(M))^2 ~= M n=ceil(sqrt(M)); while mod(M, n)~=0 && n<1.2*sqrt(M), n=n+1; end m=ceil(M/n); else n=sqrt(M); m=n; end else n = cols; m = ceil(M/n); end array=-ones(buf+m*(sz+buf),buf+n*(sz+buf)); if ~opt_graycolor array = 0.1.* array; end if ~opt_colmajor k=1; for i=1:m for j=1:n if k>M, continue; end clim=max(abs(A(:,k))); if opt_normalize array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/clim; else array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/max(abs(A(:))); end k=k+1; end end else k=1; for j=1:n for i=1:m if k>M, continue; end clim=max(abs(A(:,k))); if opt_normalize array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz)/clim; else array(buf+(i-1)*(sz+buf)+(1:sz),buf+(j-1)*(sz+buf)+(1:sz))=reshape(A(:,k),sz,sz); end k=k+1; end end end if opt_graycolor h=imagesc(array,'EraseMode','none',[-1 1]); else h=imagesc(array,'EraseMode','none',[-1 1]); end axis image off drawnow; warning on all ================================================ FILE: DeepLearning/UFLDL/stl_exercise/feedForwardAutoencoder.m ================================================ function [activation] = feedForwardAutoencoder(theta, hiddenSize, visibleSize, data) % theta: trained weights from the autoencoder % visibleSize: the number of input units (probably 64) % hiddenSize: the number of hidden units (probably 25) % data: Our matrix containing the training data as columns. So, data(:,i) is the i-th training example. % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this % follows the notation convention of the lecture notes. %200 x 784 W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize); % 200 x 1 b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize); %% ---------- YOUR CODE HERE -------------------------------------- % Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder. activation = sigmoid(bsxfun(@plus,W1 * data,b1)) ; %------------------------------------------------------------------- end %------------------------------------------------------------------- % Here's an implementation of the sigmoid function, which you may find useful % in your computation of the costs and the gradients. This inputs a (row or % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). function sigm = sigmoid(x) sigm = 1 ./ (1 + exp(-x)); end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/initializeParameters.m ================================================ function theta = initializeParameters(hiddenSize, visibleSize) %% Initialize parameters randomly based on layer sizes. r = sqrt(6) / sqrt(hiddenSize+visibleSize+1); % we'll choose weights uniformly from the interval [-r, r] W1 = rand(hiddenSize, visibleSize) * 2 * r - r; W2 = rand(visibleSize, hiddenSize) * 2 * r - r; b1 = zeros(hiddenSize, 1); b2 = zeros(visibleSize, 1); % Convert weights and bias gradients to the vector form. % This step will "unroll" (flatten and concatenate together) all % your parameters into a vector, which can then be used with minFunc. theta = [W1(:) ; W2(:) ; b1(:) ; b2(:)]; end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/loadMNISTImages.m ================================================ function images = loadMNISTImages(filename) %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing %the raw MNIST images fp = fopen(filename, 'rb'); assert(fp ~= -1, ['Could not open ', filename, '']); magic = fread(fp, 1, 'int32', 0, 'ieee-be'); assert(magic == 2051, ['Bad magic number in ', filename, '']); numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); images = fread(fp, inf, 'unsigned char'); images = reshape(images, numCols, numRows, numImages); images = permute(images,[2 1 3]); fclose(fp); % Reshape to #pixels x #examples images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); % Convert to double and rescale to [0,1] images = double(images) / 255; end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/loadMNISTLabels.m ================================================ function labels = loadMNISTLabels(filename) %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing %the labels for the MNIST images fp = fopen(filename, 'rb'); assert(fp ~= -1, ['Could not open ', filename, '']); magic = fread(fp, 1, 'int32', 0, 'ieee-be'); assert(magic == 2049, ['Bad magic number in ', filename, '']); numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); labels = fread(fp, inf, 'unsigned char'); assert(size(labels,1) == numLabels, 'Mismatch in label count'); fclose(fp); end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/softmaxCost.m ================================================ function [cost, grad] = softmaxCost(theta, numClasses, inputSize, lambda, data, labels) % numClasses - the number of classes % inputSize - the size N of the input vector % lambda - weight decay parameter % data - the N x M input matrix, where each column data(:, i) corresponds to % a single test set % labels - an M x 1 matrix containing the labels corresponding for the input data % % Unroll the parameters from theta % 10x8 theta = reshape(theta, numClasses, inputSize); %100 data 8x100 numCases = size(data, 2); %sparse(r,c,v) = [r(i),c(i)] = v(i) % 每一列的第 label 行 设置为i,也就是,每个样本属于哪个label,该位置就是1,其他的均为0 % 10 x 100 groundTruth = full(sparse(labels, 1:numCases, 1)); cost = 0; %10 x 8 thetagrad = zeros(numClasses, inputSize); %% ---------- YOUR CODE HERE -------------------------------------- % Instructions: Compute the cost and gradient for softmax regression. % You need to compute thetagrad and cost. % The groundTruth matrix might come in handy. % 10x100 10x8 8x100 100个样本,每个样本属于每个label的概率 M = theta * data; M = exp(bsxfun(@minus,M,max(M,[],1))); H = bsxfun(@rdivide,M,sum(M)); cost = - sum(sum((groundTruth .* log(H)))) / size(data,2) + lambda * sum(sum(theta.^2)) / 2 ; % 10x8 10x100 100x8 thetagrad = - ((groundTruth - H)) * data' /size(data,2) +lambda * theta; % ------------------------------------------------------------------ % Unroll the gradient matrices into a vector for minFunc grad = [thetagrad(:)]; end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/softmaxPredict.m ================================================ function [pred] = softmaxPredict(softmaxModel, data) % softmaxModel - model trained using softmaxTrain % data - the N x M input matrix, where each column data(:, i) corresponds to % a single test set % % Your code should produce the prediction matrix % pred, where pred(i) is argmax_c P(y(c) | x(i)). % Unroll the parameters from theta theta = softmaxModel.optTheta; % this provides a numClasses x inputSize matrix pred = zeros(1, size(data, 2)); %% ---------- YOUR CODE HERE -------------------------------------- % Instructions: Compute pred using theta assuming that the labels start % from 1. [Val , pred] = max(theta*data); % --------------------------------------------------------------------- end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/softmaxTrain.m ================================================ function [softmaxModel] = softmaxTrain(inputSize, numClasses, lambda, inputData, labels, options) %softmaxTrain Train a softmax model with the given parameters on the given % data. Returns softmaxOptTheta, a vector containing the trained parameters % for the model. % % inputSize: the size of an input vector x^(i) % numClasses: the number of classes % lambda: weight decay parameter % inputData: an N by M matrix containing the input data, such that % inputData(:, c) is the cth input % labels: M by 1 matrix containing the class labels for the % corresponding inputs. labels(c) is the class label for % the cth input % options (optional): options % options.maxIter: number of iterations to train for if ~exist('options', 'var') options = struct; end if ~isfield(options, 'maxIter') options.maxIter = 400; end % initialize parameters theta = 0.005 * randn(numClasses * inputSize, 1); % Use minFunc to minimize the function addpath minFunc/ options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost % function. Generally, for minFunc to work, you % need a function pointer with two outputs: the % function value and the gradient. In our problem, % softmaxCost.m satisfies this. minFuncOptions.display = 'on'; [softmaxOptTheta, cost] = minFunc( @(p) softmaxCost(p, ... numClasses, inputSize, lambda, ... inputData, labels), ... theta, options); % Fold softmaxOptTheta into a nicer format softmaxModel.optTheta = reshape(softmaxOptTheta, numClasses, inputSize); softmaxModel.inputSize = inputSize; softmaxModel.numClasses = numClasses; end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/sparseAutoencoderCost.m ================================================ function [cost,grad] = sparseAutoencoderCost(theta, visibleSize, hiddenSize, ... lambda, sparsityParam, beta, data) %lambda = 0; %beta = 0; % visibleSize: the number of input units (probably 64) % hiddenSize: the number of hidden units (probably 25) % lambda: weight decay parameter % sparsityParam: The desired average activation for the hidden units (denoted in the lecture % notes by the greek alphabet rho, which looks like a lower-case "p"). % beta: weight of sparsity penalty term % data: Our 64x10000 matrix containing the training data. So, data(:,i) is the i-th training example. % The input theta is a vector (because minFunc expects the parameters to be a vector). % We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this % follows the notation convention of the lecture notes. % 学习率 自己定义的 alpha = 0.03; % 计算隐藏层神经元的激活度 p = zeros(hiddenSize,1); W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize); W2 = reshape(theta(hiddenSize*visibleSize+1:2*hiddenSize*visibleSize), visibleSize, hiddenSize); b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize); b2 = theta(2*hiddenSize*visibleSize+hiddenSize+1:end); % Cost and gradient variables (your code needs to compute these values). % Here, we initialize them to zeros. %% ---------- YOUR CODE HERE -------------------------------------- % Instructions: Compute the cost/optimization objective J_sparse(W,b) for the Sparse Autoencoder, % and the corresponding gradients W1grad, W2grad, b1grad, b2grad. % % W1grad, W2grad, b1grad and b2grad should be computed using backpropagation. % Note that W1grad has the same dimensions as W1, b1grad has the same dimensions % as b1, etc. Your code should set W1grad to be the partial derivative of J_sparse(W,b) with % respect to W1. I.e., W1grad(i,j) should be the partial derivative of J_sparse(W,b) % with respect to the input parameter W1(i,j). Thus, W1grad should be equal to the term % [(1/m) \Delta W^{(1)} + \lambda W^{(1)}] in the last block of pseudo-code in Section 2.2 % of the lecture notes (and similarly for W2grad, b1grad, b2grad). % % Stated differently, if we were using batch gradient descent to optimize the parameters, % the gradient descent update to W1 would be W1 := W1 - alpha * W1grad, and similarly for W2, b1, b2. % numPatches = size(data,2); KLdist = 0; %% 向前传输 a2 = sigmoid(W1*data+repmat(b1,[1,numPatches])); p = sum(a2,2); a3 = sigmoid(W2 * a2 + repmat(b2,[1,numPatches])); J_sparse = 0.5 * sum(sum((a3-data).^2)); %% 计算 隐藏层的平均激活度 p = p / numPatches ; %% 向后传输 residual3 = -(data-a3).*a3.*(1-a3); tmp = beta * ( - sparsityParam ./ p + (1-sparsityParam) ./ (1-p)); residual2 = (W2' * residual3 + repmat(tmp,[1,numPatches])) .* a2.*(1-a2); W2grad = residual3 * a2' / numPatches + lambda * W2 ; W1grad = residual2 * data' / numPatches + lambda * W1 ; b2grad = sum(residual3,2) / numPatches; b1grad = sum(residual2,2) / numPatches; %% 更新权重参数 加上 lambda 权重衰减 W2 = W2 - alpha * ( W2grad ); W1 = W1 - alpha * ( W1grad ); b2 = b2 - alpha * (b2grad ); b1 = b1 - alpha * (b1grad ); %% 计算KL相对熵 for j = 1:hiddenSize KLdist = KLdist + sparsityParam *log( sparsityParam / p(j) ) + (1 - sparsityParam) * log((1-sparsityParam) / (1 - p(j))); end %% costFunction 加上 lambda 权重衰减 cost = J_sparse / numPatches + (sum(sum(W1.^2)) + sum(sum(W2.^2))) * lambda / 2 + beta * KLdist; %------------------------------------------------------------------- % After computing the cost and gradient, we will convert the gradients back % to a vector format (suitable for minFunc). Specifically, we will unroll % your gradient matrices into a vector. grad = [W1grad(:) ; W2grad(:) ; b1grad(:) ; b2grad(:)]; end %------------------------------------------------------------------- % Here's an implementation of the sigmoid function, which you may find useful % in your computation of the costs and the gradients. This inputs a (row or % column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)). function sigm = sigmoid(x) sigm = 1 ./ (1 + exp(-x)); end ================================================ FILE: DeepLearning/UFLDL/stl_exercise/stlExercise.m ================================================ %% CS294A/CS294W Self-taught Learning Exercise % Instructions % ------------ % % This file contains code that helps you get started on the % self-taught learning. You will need to complete code in feedForwardAutoencoder.m % You will also need to have implemented sparseAutoencoderCost.m and % softmaxCost.m from previous exercises. % %% ====================================================================== % STEP 0: Here we provide the relevant parameters values that will % allow your sparse autoencoder to get good filters; you do not need to % change the parameters below. clc clear inputSize = 28 * 28; numLabels = 5; hiddenSize = 200; sparsityParam = 0.1; % desired average activation of the hidden units. % (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", % in the lecture notes). lambda = 3e-3; % weight decay parameter beta = 3; % weight of sparsity penalty term maxIter = 400; %% ====================================================================== % STEP 1: Load data from the MNIST database % % This loads our training and test data from the MNIST database files. % We have sorted the data for you in this so that you will not have to % change it. % Load MNIST database files mnistData = loadMNISTImages('train-images.idx3-ubyte'); mnistLabels = loadMNISTLabels('train-labels.idx1-ubyte'); % Set Unlabeled Set (All Images) % Simulate a Labeled and Unlabeled set labeledSet = find(mnistLabels >= 0 & mnistLabels <= 4); unlabeledSet = find(mnistLabels >= 5); numTrain = round(numel(labeledSet)/2); trainSet = labeledSet(1:numTrain); testSet = labeledSet(numTrain+1:end); unlabeledData = mnistData(:, unlabeledSet); trainData = mnistData(:, trainSet); trainLabels = mnistLabels(trainSet)' + 1; % Shift Labels to the Range 1-5 testData = mnistData(:, testSet); testLabels = mnistLabels(testSet)' + 1; % Shift Labels to the Range 1-5 % Output Some Statistics fprintf('# examples in unlabeled set: %d\n', size(unlabeledData, 2)); fprintf('# examples in supervised training set: %d\n\n', size(trainData, 2)); fprintf('# examples in supervised testing set: %d\n\n', size(testData, 2)); %% ====================================================================== % STEP 2: Train the sparse autoencoder % This trains the sparse autoencoder on the unlabeled training % images. % Randomly initialize the parameters theta = initializeParameters(hiddenSize, inputSize); %% ----------------- YOUR CODE HERE ---------------------- % Find opttheta by running the sparse autoencoder on % unlabeledTrainingImages addpath minFunc/ options.Method = 'lbfgs'; options.maxIter = 400; options.display = 'on'; visibleSize = inputSize; [opttheta, cost] = minFunc( @(p) sparseAutoencoderCost(p, ... visibleSize, hiddenSize, ... lambda, sparsityParam, ... beta, unlabeledData), ... theta, options); %% ----------------------------------------------------- % Visualize weights W1 = reshape(opttheta(1:hiddenSize * inputSize), hiddenSize, inputSize); display_network(W1'); %%====================================================================== %% STEP 3: Extract Features from the Supervised Dataset % % You need to complete the code in feedForwardAutoencoder.m so that the % following command will extract features from the data. trainFeatures = feedForwardAutoencoder(opttheta, hiddenSize, inputSize, ... trainData); testFeatures = feedForwardAutoencoder(opttheta, hiddenSize, inputSize, ... testData); %%====================================================================== %% STEP 4: Train the softmax classifier softmaxModel = struct; %% ----------------- YOUR CODE HERE ---------------------- % Use softmaxTrain.m from the previous exercise to train a multi-class % classifier. options.maxIter = 100; lambda = 1e-4; numClasses = 5; inputData = trainFeatures; labels = trainLabels ; inputSize = hiddenSize; softmaxModel = softmaxTrain(inputSize, numClasses, lambda, inputData, labels, options); % Use lambda = 1e-4 for the weight regularization for softmax % You need to compute softmaxModel using softmaxTrain on trainFeatures and % trainLabels %% ----------------------------------------------------- %%====================================================================== %% STEP 5: Testing %% ----------------- YOUR CODE HERE ---------------------- % Compute Predictions on the test set (testFeatures) using softmaxPredict % and softmaxModel [pred] = softmaxPredict(softmaxModel, testFeatures); %% ----------------------------------------------------- % Classification Score fprintf('Test Accuracy: %f%%\n', 100*mean(pred(:) == testLabels(:))); % (note that we shift the labels by 1, so that digit 0 now corresponds to % label 1) % % Accuracy is the proportion of correctly classified images % The results for our implementation was: % % Accuracy: 98.3% % % ================================================ FILE: GMM/README.md ================================================ 博客链接:http://blog.csdn.net/llp1992/article/details/47058109 更多机器学习深度学习博客请关注CSDN博客:[LiuLongpo](http://blog.csdn.net/llp1992) ================================================ FILE: GMM/gmm.m ================================================ function varargout = gmm(X, K_or_centroids) % ============================================================ % Expectation-Maximization iteration implementation of % Gaussian Mixture Model. % % PX = GMM(X, K_OR_CENTROIDS) % [PX MODEL] = GMM(X, K_OR_CENTROIDS) % % - X: N-by-D data matrix. % - K_OR_CENTROIDS: either K indicating the number of % components or a K-by-D matrix indicating the % choosing of the initial K centroids. % % - PX: N-by-K matrix indicating the probability of each % component generating each point. % - MODEL: a structure containing the parameters for a GMM: % MODEL.Miu: a K-by-D matrix. % MODEL.Sigma: a D-by-D-by-K matrix. % MODEL.Pi: a 1-by-K vector. % ============================================================ threshold = 1e-15; [N, D] = size(X); % isscalar 判断是否为标量 if isscalar(K_or_centroids) K = K_or_centroids; % randomly pick centroids rndp = randperm(N); centroids = X(rndp(1:K), :); else % 矩阵,给出每一类的初始化 K = size(K_or_centroids, 1); centroids = K_or_centroids; end % initial values [pMiu pPi pSigma] = init_params(); Lprev = -inf; while true %% Estiamtion Step Px = calc_prob(); % new value for pGamma pGamma = Px .* repmat(pPi, N, 1); pGamma = pGamma ./ repmat(sum(pGamma, 2), 1, K); %% Maximization Step % new value for parameters of each Component Nk = sum(pGamma, 1); pMiu = diag(1./Nk) * pGamma' * X; pPi = Nk/N; for kk = 1:K Xshift = X-repmat(pMiu(kk, :), N, 1); pSigma(:, :, kk) = (Xshift' * (diag(pGamma(:, kk)) * Xshift)) / Nk(kk); end %% check for convergence L = sum(log(Px*pPi')); if L-Lprev < threshold break; end Lprev = L; end % 输出参数判定 if nargout == 1 varargout = {Px}; else model = []; model.Miu = pMiu; model.Sigma = pSigma; model.Pi = pPi; varargout = {Px, model}; end function [pMiu pPi pSigma] = init_params() pMiu = centroids; % 均值,也就是K类的中心 pPi = zeros(1, K); % 概率 pSigma = zeros(D, D, K); %协方差矩阵,每个都是 D*D % hard assign x to each centroids % (X - pMiu)^2 = X^2 + pMiu^2 - 2*X*pMiu distmat = repmat(sum(X.*X, 2), 1, K) + repmat(sum(pMiu.*pMiu, 2)', N, 1) - 2*X*pMiu'; [dummy labels] = min(distmat, [], 2); for k=1:K %初始化参数 Xk = X(labels == k, :); pPi(k) = size(Xk, 1)/N; pSigma(:, :, k) = cov(Xk); end end % 计算概率 function Px = calc_prob() Px = zeros(N, K); for k = 1:K Xshift = X-repmat(pMiu(k, :), N, 1); inv_pSigma = inv(pSigma(:, :, k)+diag(repmat(threshold,1,size(pSigma(:, :, k),1)))); % 方差矩阵求逆 tmp = sum((Xshift*inv_pSigma) .* Xshift, 2); coef = (2*pi)^(-D/2) * sqrt(det(inv_pSigma)); % det 求方差矩阵的行列式 Px(:, k) = coef * exp(-0.5*tmp); end end end ================================================ FILE: GMM/gmm.py ================================================ # -*- coding: utf-8 -*- ''' Description :GMM in Python Author : LiuLongpo Time : 2015年7月26日16:54:48 Source :From pluskid ''' import sys; sys.path.append("E:\Python\MachineLearning\PythonTools") import PythonUtils as pu import matplotlib.pyplot as plt import numpy as np '矩阵的逆矩阵需要的库' from numpy.linalg import * def gmm(X,K): threshold = 1e-15 N,D = np.shape(X) randV = pu.randIntList(1,N,K) centroids = X[randV] pMiu,pPi,pSigma = inti_params(centroids,K,X,N,D); Lprev = -np.inf while True: 'Estiamtion Step' Px = calc_prop(X,N,K,pMiu,pSigma,threshold,D) pGamma = Px * np.tile(pPi,(N,1)) pGamma = pGamma / np.tile((np.sum(pGamma,axis=1)),(K,1)).T 'Maximization Step' Nk = np.sum(pGamma,axis=0) pMiu = np.dot(np.dot(np.diag(1 / Nk),pGamma.T),X) pPi = Nk / N for kk in range(K): Xshift = X - np.tile(pMiu[kk],(N,1)) pSigma[:,:,kk] = (np.dot(np.dot(Xshift.T,np.diag(pGamma[:,kk])),Xshift)) / Nk[kk] 'check for convergence' L = np.sum(np.log(np.dot(Px,pPi.T))) if L-Lprev