Repository: sammy-suyama/BayesBook Branch: master Commit: 61cb7ee0f1df Files: 24 Total size: 66.9 KB Directory structure: gitextract_c_lvl76e/ ├── LICENSE ├── README.md ├── data/ │ └── timeseries.jld ├── docker/ │ ├── Dockerfile │ ├── README.md │ └── add_packages.jl └── src/ ├── BayesNeuralNet.jl ├── DimensionalityReduction.jl ├── GaussianMixtureModel.jl ├── LogisticRegression.jl ├── NMF.jl ├── PoissonHMM.jl ├── PoissonMixtureModel.jl ├── demo_BayesNeuralNet.jl ├── demo_DimensionalityReduction.jl ├── demo_GaussianMixtureModel.jl ├── demo_LogisticRegression.jl ├── demo_NMF.jl ├── demo_PoissonHMM.jl ├── demo_PoissonMixtureModel.jl ├── demo_PolynomialRegression.jl ├── demo_Simple2DGauss.jl ├── demo_SimpleFitting.jl └── demo_nonconjugate.jl ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Sammy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # BayesBook 「機械学習スタートアップシリーズ ベイズ推論による機械学習入門」のソースコードをアップしています。 * http://www.kspub.co.jp/book/detail/1538320.html * 正誤表(第1~3刷まで) https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo.pdf * 正誤表(第4刷まで) https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo_v4.pdf ソースコードはJuliaで書かれています。(推奨Vesion:0.6.0) * The Julia Language: http://julialang.org/ * Julia Documentation: http://docs.julialang.org/ グラフの描画やテストデータのダウンロードに一部Pythonライブラリを利用しています。 * Python: https://www.python.org/ * Matplotlib: https://matplotlib.org/ * scikit-learn: http://scikit-learn.org/ 上記の環境構築が煩わしい場合にはDockerfileも用意しています. * Docker: https://docs.docker.com/ ================================================ FILE: docker/Dockerfile ================================================ FROM python:latest # Update RUN apt-get update # Install matplotlib RUN pip3 install matplotlib scipy scikit-learn notebook # Install libraries RUN apt-get install -y sudo hdf5-tools libzmq3 # Install julia 0.6.0 RUN wget https://julialang-s3.julialang.org/bin/linux/x64/0.6/julia-0.6.0-linux-x86_64.tar.gz && \ tar -xzf julia-0.6.0-linux-x86_64.tar.gz && \ ln -s /julia-903644385b/bin/julia /usr/local/bin/julia # Set the working directory to /work WORKDIR /work # Add julia packages ADD add_packages.jl /work RUN julia add_packages.jl # Download source codes RUN git clone https://github.com/sammy-suyama/BayesBook.git # Make port 8888 available to the world outside this container EXPOSE 8888 # Start jupyter notebook CMD jupyter notebook --allow-root --port=8888 --ip=0.0.0.0 ================================================ FILE: docker/README.md ================================================ # DockerからJupyter notebookを実行する JuliaやPythonの実行環境構築が煩わしい場合は、Dockerを使ってデモスクリプトをJupyter notebook上で動作させることができます。 Dockerのインストールに関しては公式サイトを参考ください。 * https://docs.docker.com/engine/installation/ `Dockerfile`の置いてあるディレクトリで、イメージを作成・実行します。 $ docker build -t bayesbook . $ docker run -p 8888:8888 bayesbook ================================================ FILE: docker/add_packages.jl ================================================ Pkg.update() Pkg.add("PyPlot") Pkg.add("StatsFuns") Pkg.add("SpecialFunctions") Pkg.add("Distributions") Pkg.add("PDMats") Pkg.add("ProgressMeter") Pkg.add("DataFrames") Pkg.add("HDF5") Pkg.add("JLD") Pkg.add("IJulia") ================================================ FILE: src/BayesNeuralNet.jl ================================================ """ Variational inference for Bayesian neural network """ module BayesNeuralNet using Distributions export sample_data_from_prior, sample_data_from_posterior export VI function sigmoid(x) return 1.0 / (1.0 + exp.(-x[1])) end function rho2sig(rho) return log.(1 + exp.(rho)) end function compute_df_dmu(mu, rho, W) return (W - mu) ./ rho2sig(rho).^2 end function compute_df_drho(Y, X, mu, rho, W) return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho) end function compute_dprec_drho(rho) return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho))) end function compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1, mu2, rho2, W2) M, N = size(X) Y_err1 = zeros(size(W1)) # MxK Y_err2 = zeros(size(W2)) # KxD for n in 1 : N Z = tanh.(W1'*X[:,n]) # Kx1 Y_est = W2'*Z # 2nd unit, Dx1 delta2 = Y_est - Y[n] # 1st unit, KxD delta1 = diagm(1 - Z.^2) * W2 * delta2 Y_err1 += X[:,n] * delta1' Y_err2 += Z * delta2' end df_dw1 = W1/sigma2_w + (mu1 - W1) ./ rho2sig(rho1).^2 + Y_err1 / sigma2_y df_dw2 = W2/sigma2_w + (mu2 - W2) ./ rho2sig(rho2).^2 + Y_err2 / sigma2_y return df_dw1, df_dw2 end """ Sample data given prior and inputs. """ function sample_data_from_prior(X, sigma2_w, sigma2_y, D, K) M, N = size(X) W1 = sqrt(sigma2_w) * randn(M, K) W2 = sqrt(sigma2_w) * randn(K, D) # sample function Y = [W2'* tanh.(W1'X[:,n]) for n in 1 : N] # sample data Y_obs = [W2'* tanh.(W1'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N] return Y_obs, Y, W1, W2 end """ Sample data given posterior and inputs. """ function sample_data_from_posterior(X, mu1, rho1, mu2, rho2, sigma2_y, D) N = size(X, 2) ep1 = randn(size(mu1)) W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1 ep2 = randn(size(mu2)) W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2 Y_est = [W2_tmp'* tanh.(W1_tmp'X[:,n]) for n in 1 : N] Y_obs = [W2_tmp'* tanh.(W1_tmp'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N] return Y_est, Y_obs end """ Compute variational parameters. """ function VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter) M, N = size(X) D = length(Y[1]) # initialize mu1 = randn(M, K) rho1 = randn(M, K) mu2 = randn(K, D) rho2 = randn(K, D) for i in 1 : max_iter # sample ep1 = randn(size(mu1)) W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1 ep2 = randn(size(mu2)) W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2 # calc error df_dw1, df_dw2 = compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1_tmp, mu2, rho2, W2_tmp) # 1st unit df_dmu1 = compute_df_dmu(mu1, rho1, W1_tmp) df_drho1 = compute_df_drho(Y, X, mu1, rho1, W1_tmp) d_mu1 = df_dw1 + df_dmu1 d_rho1 = df_dw1 .* (ep1 ./ (1+exp.(-rho1))) + df_drho1 mu1 = mu1 - alpha * d_mu1 rho1 = rho1 - alpha * d_rho1 # 2nd unit df_dmu2 = compute_df_dmu(mu2, rho2, W2_tmp) df_drho2 = compute_df_drho(Y, X, mu2, rho2, W2_tmp) d_mu2 = df_dw2 + df_dmu2 d_rho2 = df_dw2 .* (ep2 ./ (1+exp.(-rho2))) + df_drho2 mu2 = mu2 - alpha * d_mu2 rho2 = rho2 - alpha * d_rho2 end return mu1, rho1, mu2, rho2 end end ================================================ FILE: src/DimensionalityReduction.jl ================================================ """ Variational inference for Bayesian DimensionalityReduction """ module DimensionalityReduction using Distributions #using ProgressMeter export DRModel export sample_data, VI #################### ## Types struct DRModel D::Int M::Int sigma2_y::Float64 m_W::Array{Float64, 2} # MxD Sigma_W::Array{Float64, 3} # MxMxD m_mu::Array{Float64, 1} # D Sigma_mu::Array{Float64, 2} # DxD end #################### ## functions function sqsum(mat::Array{Float64}, idx::Int) return squeeze(sum(mat, idx), idx) end """ Sample data given hyperparameters. """ function sample_data(N::Int, model::DRModel) D = model.D M = model.M W = zeros(M, D) mu = zeros(D) for d in 1 : D W[:,d] = rand(MvNormal(model.m_W[:,d], model.Sigma_W[:,:,d])) end mu = rand(MvNormal(model.m_mu, model.Sigma_mu)) Y = zeros(D, N) X = randn(M, N) for n in 1 : N Y[:,n] = rand(MvNormal(W'*X[:,n] + mu, model.sigma2_y*eye(D))) end return Y, X, W, mu end function init(Y::Array{Float64, 2}, prior::DRModel) M = prior.M D, N = size(Y) X = randn(M, N) XX = zeros(M, M, N) for n in 1 : N XX[:,:,n] = X[:,n]*X[:,n]' + eye(M) end return X, XX end function update_W(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel, X::Array{Float64, 2}, XX::Array{Float64, 3}) D = prior.D M = prior.M N = size(Y, 2) m_W = zeros(M, D) Sigma_W = zeros(M, M, D) mu = posterior.m_mu for d in 1 : D Sigma_W[:,:,d] = inv(inv(prior.sigma2_y)*sqsum(XX, 3) + inv(prior.Sigma_W[:,:,d])) m_W[:,d] = Sigma_W[:,:,d]*(inv(prior.sigma2_y)*X*(Y[[d],:] - mu[d]*ones(1, N))' + inv(prior.Sigma_W[:,:,d])*prior.m_W[:,d]) end return DRModel(D, M, prior.sigma2_y, m_W, Sigma_W, posterior.m_mu, posterior.Sigma_mu) end function update_mu(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel, X::Array{Float64, 2}, XX::Array{Float64, 3}) N = size(Y, 2) D = prior.D M = prior.M W = posterior.m_W Sigma_mu = inv(N*inv(prior.sigma2_y)*eye(D) + inv(prior.Sigma_mu)) m_mu = Sigma_mu*(inv(prior.sigma2_y)*sqsum(Y - W'*X, 2) + inv(prior.Sigma_mu)*prior.m_mu) return DRModel(D, M, prior.sigma2_y, posterior.m_W, posterior.Sigma_W, m_mu, Sigma_mu) end function update_X(Y::Array{Float64, 2}, posterior::DRModel) D, N = size(Y) M = posterior.M W = posterior.m_W WW = zeros(M, M, D) for d in 1 : D WW[:,:,d] = W[:,d]*W[:,d]' + posterior.Sigma_W[:,:,d] end mu = posterior.m_mu X = zeros(M, N) XX = zeros(M, M, N) for n in 1 : N Sigma = inv(inv(posterior.sigma2_y)*sqsum(WW, 3) + eye(M)) X[:,n] = inv(posterior.sigma2_y)*Sigma*W*(Y[:,n] - mu) XX[:,:,n] = X[:,n] * X[:,n]' + Sigma end return X, XX end function interpolate(mask::BitArray{2}, X::Array{Float64, 2}, posterior::DRModel) Y_est = posterior.m_W'*X + repmat(posterior.m_mu, 1, size(X, 2)) return return Y_est[mask] end """ Compute variational posterior distributions. """ function VI(Y::Array{Float64, 2}, prior::DRModel, max_iter::Int) X, XX = init(Y, prior) mask = isnan.(Y) sum_nan = sum(mask) posterior = deepcopy(prior) #progress = Progress(max_iter) for iter in 1 : max_iter # progress #next!(progress) # Interpolate if sum_nan > 0 Y[mask] = interpolate(mask, X, posterior) end # M-step posterior = update_W(Y, prior, posterior, X, XX) posterior = update_mu(Y, prior, posterior, X, XX) # E-step X, XX = update_X(Y, posterior) end return posterior, X end end ================================================ FILE: src/GaussianMixtureModel.jl ================================================ """ Bayesian Gaussian Mixture Model """ module GaussianMixtureModel using StatsFuns.logsumexp, SpecialFunctions.digamma using Distributions using PDMats export GW, BGMM, Gauss, GMM export sample_GMM, sample_data, winner_takes_all export learn_GS, learn_CGS, learn_VI #################### ## Types struct GW # Parameters of Gauss Wisahrt distribution beta::Float64 m::Vector{Float64} nu::Float64 W::Matrix{Float64} end struct BGMM # Parameters of Bayesian Gaussian Mixture Model D::Int K::Int alpha::Vector{Float64} cmp::Vector{GW} end struct Gauss # Parameters of Gauss Distribution mu::Vector{Float64} Lambda::Matrix{Float64} end struct GMM # Parameters of Gauss Mixture Model D::Int K::Int phi::Vector{Float64} cmp::Vector{Gauss} end #################### ## Common functions """ Sample a GMM given hyperparameters. """ function sample_GMM(bgmm::BGMM) cmp = Vector{Gauss}() for c in bgmm.cmp Lambda = rand(Wishart(c.nu, PDMats.PDMat(Symmetric(c.W)))) mu = rand(MvNormal(c.m, PDMats.PDMat(Symmetric(inv(c.beta*Lambda))))) push!(cmp, Gauss(mu, Lambda)) end phi = rand(Dirichlet(bgmm.alpha)) return GMM(bgmm.D, bgmm.K, phi, cmp) end """ Sample data from a specific GMM model. """ function sample_data(gmm::GMM, N::Int) X = zeros(gmm.D, N) S = categorical_sample(gmm.phi, N) for n in 1 : N k = indmax(S[:, n]) X[:,n] = rand(MvNormal(gmm.cmp[k].mu, PDMats.PDMat(Symmetric(inv(gmm.cmp[k].Lambda))))) end return X, S end categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1] function categorical_sample(p::Vector{Float64}, N::Int) K = length(p) S = zeros(K, N) S_tmp = rand(Categorical(p), N) for k in 1 : K S[k,find(S_tmp.==k)] = 1 end return S end function sumdigamma(nu, D) ret = 0.0 for d in 1 : D ret += digamma.(0.5*(nu + 1 - d)) end return ret end function init_S(X::Matrix{Float64}, bgmm::BGMM) N = size(X, 2) K = bgmm.K S = categorical_sample(ones(K)/K, N) return S end function calc_ELBO(X::Array{Float64, 2}, pri::BGMM, pos::BGMM) function logCw(nu, W) D = size(W, 1) return -0.5*nu*logdet(W) - 0.5*nu*D*log.(2) - 0.25*D*(D-1)*log.(pi) - sum([lgamma.(0.5*(nu+1-d)) for d in 1 : D]) end ln_expt_S = update_S(pos, X) expt_S = exp.(ln_expt_S) K, N = size(expt_S) D = size(X, 1) expt_ln_lkh = 0 for k in 1 : K expt_Lambda = pos.cmp[k].nu * pos.cmp[k].W expt_Lambda_mu = pos.cmp[k].nu * pos.cmp[k].W * pos.cmp[k].m expt_mu_Lambda_mu = (pos.cmp[k].nu * pos.cmp[k].m' * pos.cmp[k].W * pos.cmp[k].m)[1] + D/pos.cmp[k].beta expt_ln_Lambda = sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W) expt_ln_pi = digamma.(pos.alpha) - digamma.(sum(pos.alpha)) for n in 1 : N # expt_ln_lkh += -0.5 * expt_S[k,n]*(trace(X[:,n]*X[:,n]'*expt_Lambda) - 2*(X[:,n]'*expt_Lambda_mu)[1] + expt_mu_Lambda_mu - expt_ln_Lambda + D * log.(2*pi) ) # expt_ln_lkh += expt_S[k,n]*expt_ln_pi[k] end end # - expt_ln_lkh -= sum(expt_S.*ln_expt_S) KL_mu_Lambda = [(0.5*D*(log.(pos.cmp[k].beta) - log.(pri.cmp[k].beta) + pri.cmp[k].beta/pos.cmp[k].beta - pos.cmp[k].nu - 1) + 0.5*(pos.cmp[k].nu-pri.cmp[k].nu)*(sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W)) + logCw(pos.cmp[k].nu, pos.cmp[k].W) - logCw(pri.cmp[k].nu, pri.cmp[k].W) + 0.5*pos.cmp[k].nu*trace((pri.cmp[k].beta*(pos.cmp[k].m-pri.cmp[k].m)*(pos.cmp[k].m-pri.cmp[k].m)' +inv(pri.cmp[k].W))*pos.cmp[k].W)) for k in 1 : K] KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha)) - sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha)) + (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha))) )[1] VB = expt_ln_lkh - (sum(KL_mu_Lambda) + KL_pi) return VB end function add_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64}) D = bgmm.D K = bgmm.K sum_S = sum(S, 2) alpha = [bgmm.alpha[k] + sum_S[k] for k in 1 : K] cmp = Vector{GW}() XS = X*S'; for k in 1 : K beta = bgmm.cmp[k].beta + sum_S[k] m = (1.0/beta)*(vec(X*S[[k],:]') + bgmm.cmp[k].beta*bgmm.cmp[k].m) nu = bgmm.cmp[k].nu + sum_S[k] W = inv(X*diagm(S[k,:])*X' - beta*m*m' + bgmm.cmp[k].beta*bgmm.cmp[k].m*bgmm.cmp[k].m' + inv(bgmm.cmp[k].W)) push!(cmp, GW(beta, m, nu, W)) end return BGMM(D, K, alpha, cmp) end remove_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bgmm, X, -S) #################### ## used for Variational Inference function update_S(bgmm::BGMM, X::Matrix{Float64}) D, N = size(X) K = bgmm.K ln_S = zeros(K, N) tmp = zeros(K) tmp = NaN * zeros(K) sum_digamma_tmp = digamma.(sum(bgmm.alpha)) for k in 1 : K tmp[k] = -0.5*(bgmm.cmp[k].nu*trace(bgmm.cmp[k].m*bgmm.cmp[k].m'*bgmm.cmp[k].W) + D*(1.0/bgmm.cmp[k].beta) - (sumdigamma(bgmm.cmp[k].nu, D) + logdet(bgmm.cmp[k].W))) tmp[k] += digamma.(bgmm.alpha[k]) - sum_digamma_tmp end for n in 1 : N tmp_ln_pi = NaN * zeros(K) for k in 1 : K tmp_ln_pi[k] = tmp[k] -0.5*bgmm.cmp[k].nu*trace((X[:,n]*X[:,n]' - 2*bgmm.cmp[k].m*X[:,n]')*bgmm.cmp[k].W) end ln_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi) end return ln_S end """ Pick single states having a max probability. """ function winner_takes_all(S::Matrix{Float64}) S_ret = zeros(size(S)) for n in 1 : size(S_ret, 2) idx = indmax(S[:,n]) S_ret[idx,n] = 1 end return S_ret end #################### ## used for Gibbs Sampling function sample_S_GS(gmm::GMM, X::Matrix{Float64}) D, N = size(X) K = gmm.K S = zeros(K, N) tmp = [0.5*logdet(gmm.cmp[k].Lambda) + log.(gmm.phi[k]) for k in 1 : K] for n in 1 : N tmp_ln_phi = [-0.5*trace(gmm.cmp[k].Lambda*(X[:,n] - gmm.cmp[k].mu)*(X[:,n] - gmm.cmp[k].mu)') + tmp[k] for k in 1 : K] tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi) S[:,n] = categorical_sample(exp.(tmp_ln_phi)) end return S end #################### ## used for Collapsed Gibbs Sampling function calc_ln_ST(Xn::Vector{Float64}, gw::GW) # TODO; need to check value? D = size(Xn, 1) W = ((1 - D + gw.nu)*gw.beta / (1 + gw.beta)) * gw.W #ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, (gw.nu/(gw.nu - 2))*inv(W)), Xn) ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, PDMats.PDMat(Symmetric(inv(W)))), Xn) return sum(ln_lkh) end function sample_Sn(Xn::Vector{Float64}, bgmm::BGMM) ln_tmp = [(calc_ln_ST(Xn, bgmm.cmp[k]) + log.(bgmm.alpha[k])) for k in 1 : bgmm.K] ln_tmp = ln_tmp - logsumexp(ln_tmp) Sn = categorical_sample(exp.(ln_tmp)) return Sn end function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bgmm::BGMM) D, N = size(X) K = size(S, 1) for n in randperm(N) # remove bgmm = remove_stats(bgmm, X[:,[n]], S[:,[n]]) # sample S[:,n] = sample_Sn(X[:,n], bgmm) # insert bgmm = add_stats(bgmm, X[:,[n]], S[:,[n]]) end return S, bgmm end #################### ## Algorithm main """ Compute posterior distributions via variational inference. """ function learn_VI(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int) # initialisation expt_S = init_S(X, prior_bgmm) bgmm = add_stats(prior_bgmm, X, expt_S) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # E-step expt_S = exp.(update_S(bgmm, X)) # M-step bgmm = add_stats(prior_bgmm, X, expt_S) # calc VB VB[i] = calc_ELBO(X, prior_bgmm, bgmm) end # assign binary values S = winner_takes_all(expt_S) return S, bgmm, VB end """ Compute posterior distributions via Gibbs sampling. """ function learn_GS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int) # initialisation S = init_S(X, prior_bgmm) bgmm = add_stats(prior_bgmm, X, S) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # sample parameters gmm = sample_GMM(bgmm) # sample latent variables S = sample_S_GS(gmm, X) # update current model bgmm = add_stats(prior_bgmm, X, S) # calc VB VB[i] = calc_ELBO(X, prior_bgmm, bgmm) end return S, bgmm, VB end """ Compute posterior distributions via collapsed Gibbs sampling. """ function learn_CGS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int) # initialisation S = init_S(X, prior_bgmm) bgmm = add_stats(prior_bgmm, X, S) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # directly sample S S, bgmm = sample_S_CGS(S, X, bgmm) # calc VB VB[i] = calc_ELBO(X, prior_bgmm, bgmm) end return S, bgmm, VB end end ================================================ FILE: src/LogisticRegression.jl ================================================ """ Variational inference for Bayesian logistic regression. """ module LogisticRegression using Distributions export sigmoid, sample_data, VI function sigmoid(x) return 1.0 / (1.0 + exp.(-x[1])) end function bern_sample(mu) i = rand(Bernoulli(mu)) val = zeros(2) val[i+1] = 1 return val end """ Sample data & parameter given covariance Sigma_w and inputs X. """ function sample_data(X, Sigma_w) N = size(X, 2) M = size(Sigma_w, 1) # sample parameters W = rand(MvNormal(zeros(M), Sigma_w)) # sample data Y = [rand(Bernoulli(sigmoid(W'*X[:, n]))) for n in 1 : N] return Y, W end """ Compute variational parameters. """ function VI(Y, X, M, Sigma_w, alpha, max_iter) function rho2sig(rho) return log.(1 + exp.(rho)) end function compute_df_dw(Y, X, Sigma_w, mu, rho, W) M, N = size(X) term1 = (mu - W) ./ rho2sig(rho).^2 term2 = inv(Sigma_w)*W term3 = 0 for n in 1 : N term3 += -(Y[n] - sigmoid(W'*X[:,n])) * X[:,n] end return term1 + term2 + term3 end function compute_df_dmu(mu, rho, W) return (W - mu) ./ rho2sig(rho).^2 end function compute_df_drho(Y, X, Sigma_w, mu, rho, W) return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho) end function compute_dprec_drho(rho) return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho))) end # diag gaussian for approximate posterior mu = randn(M) rho = randn(M) # sigma = log.(1 + exp.(rho)) for i in 1 : max_iter # sample epsilon ep = rand(M) W_tmp = mu + log.(1 + exp.(rho)) .* ep # calculate gradient df_dw = compute_df_dw(Y, X, Sigma_w, mu, rho, W_tmp) df_dmu = compute_df_dmu(mu, rho, W_tmp) df_drho = compute_df_drho(Y, X, Sigma_w, mu, rho, W_tmp) d_mu = df_dw + df_dmu d_rho = df_dw .* (ep ./ (1+exp.(-rho))) + df_drho # update variational parameters mu = mu - alpha * d_mu rho = rho - alpha * d_rho end return mu, rho end end ================================================ FILE: src/NMF.jl ================================================ """ Variational inference for Bayesian NMF """ module NMF using Distributions using StatsFuns.logsumexp, SpecialFunctions.digamma export NMFModel export sample_data, VI #################### ## Types struct NMFModel a_t::Array{Float64, 2} # D x K b_t::Array{Float64, 2} # D x L a_v::Float64 # 1 dim b_v::Float64 # 1 dim end function sqsum(mat::Array, idx) return squeeze(sum(mat, idx), idx) end #################### ## functions function init(X::Array{Int64, 2}, model::NMFModel) D, N = size(X) K = size(model.a_t, 2) S = zeros(D, K, N) A_t = rand(D, K) B_t = rand(D, K) A_v = rand(K, N) B_v = rand(K, N) for d in 1 : D for k in 1 : K for n in 1 : N S[d,k,n] = X[d,n] * A_t[d,k] * B_t[d,k] * A_v[k,n] * B_v[k,n] end end end return S, A_t, B_t, A_v, B_v end function update_S(X::Array{Int64, 2}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2}) D, K = size(A_t) N = size(A_v, 2) S = zeros(D, K, N) for d in 1 : D for n in 1 : N # K dim ln_P = (digamma.(A_t[d,:]) + log.(B_t[d,:]) + digamma.(A_v[:,n]) + log.(B_v[:,n]) ) ln_P = ln_P - logsumexp(ln_P) S[d,:,n] = X[d,n] * exp.(ln_P) end end return S end function update_T(S::Array{Float64, 3}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2}, model::NMFModel) D, K, N = size(S) a_t = model.a_t # DxK b_t = model.b_t # DxK A_t = a_t + sqsum(S, 3) B_t = (a_t ./ b_t + repmat(sqsum(A_v.*B_v, 2)', D, 1)).^(-1) return A_t, B_t end function update_V(S::Array{Float64, 3}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel) a_v = model.a_v b_v = model.b_v D, K, N = size(S) A_v = a_v + sqsum(S, 1) B_v = (a_v / b_v + repmat(sqsum(A_t.*B_t, 1), 1, N)).^(-1) return A_v, B_v end """ Sample data given hyperparameters. """ function sample_data(N::Int, model::NMFModel) # TODO; check b or 1/b ? D, K = size(model.a_t) T = zeros(D, K) for d in 1 : D for k in 1 : K T[d,k] = rand(Gamma(model.a_t[d,k], 1.0/model.b_t[d,k])) # TODO: check end end V = reshape(rand(Gamma(model.a_v, 1.0/model.b_v), K*N) , K, N) # TODO: check S = zeros(D, K, N) for d in 1 : D for k in 1 : K for n in 1 : N S[d,k,n] = T[d,k] * V[k,n] end end end #X = sqsum(S, 2) + 0.0 # zero noise X = sqsum(S, 2) return X, T, S, V end function update_model(A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel) return NMFModel(A_t, B_t, model.a_v, model.b_v) end """ Compute variational posterior distributions. """ function VI(X::Array{Int64, 2}, model::NMFModel, max_iter::Int) K = size(model.a_t, 2) D, N = size(X) S, A_t, B_t, A_v, B_v = init(X, model) for iter in 1 : max_iter # latent S = update_S(X, A_t, B_t, A_v, B_v) A_v, B_v = update_V(S, A_t, B_t, model) # param A_t, B_t = update_T(S, A_v, B_v, model) end return update_model(A_t, B_t, model), S, A_t.*B_t, A_v.*B_v end end ================================================ FILE: src/PoissonHMM.jl ================================================ """ Bayesian 1dim Poisson Hidden Markov Model """ module PoissonHMM using StatsFuns.logsumexp, SpecialFunctions.digamma using Distributions export Gam, GHMM, Poi, HMM export sample_HMM, sample_data, winner_takes_all export learn_VI #################### ## Types struct Gam # Parameters of Gamma distribution # 1dim a::Float64 b::Float64 end struct BHMM # Parameters of Bayesian Bernoulli Mixture Model K::Int alpha_phi::Vector{Float64} alpha_A::Matrix{Float64} cmp::Vector{Gam} end struct Poi # Parameters of Poisson Distribution # 1 dim lambda::Float64 end struct HMM # Parameters of Bernoulli Mixture Model K::Int phi::Vector{Float64} A::Matrix{Float64} cmp::Vector{Poi} end #################### ## Common functions """ Sample an HMM from prior """ function sample_HMM(bhmm::BHMM) cmp = Vector{Poi}() for c in bhmm.cmp lambda = rand(Gamma(c.a, 1.0/c.b)) push!(cmp, Poi(lambda)) end phi = rand(Dirichlet(bhmm.alpha_phi)) A = zeros(size(bhmm.alpha_A)) for k in 1 : bhmm.K A[:,k] = rand(Dirichlet(bhmm.alpha_A[:,k])) end return HMM(bhmm.K, phi, A, cmp) end """ Sample data from a specific Poisson HMM """ function sample_data(hmm::HMM, N::Int) X = zeros(N) Z = zeros(hmm.K, N) # sample (n=1) Z[:,1] = categorical_sample(hmm.phi) k = indmax(Z[:, 1]) X[1] = rand(Poisson(hmm.cmp[k].lambda)) # sample (n>1) for n in 2 : N Z[:,n] = categorical_sample(hmm.A[:,k]) k = indmax(Z[:, n]) X[n] = rand(Poisson(hmm.cmp[k].lambda)) end return X, Z end categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1] function categorical_sample(p::Vector{Float64}, N::Int) K = length(p) S = zeros(K, N) S_tmp = rand(Categorical(p), N) for k in 1 : K S[k,find(S_tmp.==k)] = 1 end return S end function init_Z(X::Vector{Float64}, bhmm::BHMM) N = size(X, 1) K = bhmm.K Z = rand(Dirichlet(ones(K)/K), N) ZZ = [zeros(K,K) for _ in 1 : N - 1] for n in 1 : N - 1 ZZ[n] = Z[:,n+1] * Z[:,n]' end return Z, ZZ end """ Not implemented yet. """ function calc_ELBO(X::Matrix{Float64}, pri::BHMM, pos::BHMM) end function add_stats(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}, ZZ::Vector{Matrix{Float64}}) K = bhmm.K sum_Z = sum(Z, 2) alpha_phi = [bhmm.alpha_phi[k] + Z[k,1] for k in 1 : K] alpha_A = bhmm.alpha_A + sum(ZZ) cmp = Vector{Gam}() ZX = Z*X # (KxN) x (Nx1) = Kx1 for k in 1 : K a = bhmm.cmp[k].a + ZX[k] b = bhmm.cmp[k].b + sum_Z[k] push!(cmp, Gam(a, b)) end return BHMM(K, alpha_phi, alpha_A, cmp) end remove_stats(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}) = add_stats(bhmm, X, -Z) #################### ## used for Variational Inference function update_Z(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}) N = size(X, 1) K = bhmm.K ln_expt_Z = zeros(K, N) ln_lkh = zeros(K, N) for k in 1 : K ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b) lambda = bhmm.cmp[k].a / bhmm.cmp[k].b for n in 1 : N ln_lkh[k,n] = X[n]'*(ln_lambda) - lambda end end expt_ln_A = zeros(size(bhmm.alpha_A)) for k in 1 : K expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k])) end # copy ln_expt_Z = log.(Z) # n = 1 ln_expt_Z[:,1] = (digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi)) + expt_ln_A' * exp.(ln_expt_Z[:,2]) + ln_lkh[:,1] ) ln_expt_Z[:,1] = ln_expt_Z[:,1] - logsumexp(ln_expt_Z[:,1]) # 2 <= n <= N - 1 for n in 2 : N - 1 ln_expt_Z[:,n] =( expt_ln_A * exp.(ln_expt_Z[:,n-1]) + expt_ln_A' * exp.(ln_expt_Z[:,n+1]) + ln_lkh[:,n] ) ln_expt_Z[:,n] = ln_expt_Z[:,n] - logsumexp(ln_expt_Z[:,n]) end # n = N ln_expt_Z[:,N] =( expt_ln_A * exp.(ln_expt_Z[:,N-1]) + ln_lkh[:,N] ) ln_expt_Z[:,N] = ln_expt_Z[:,N] - logsumexp(ln_expt_Z[:,N]) # calc output Z_ret = exp.(ln_expt_Z) ZZ_ret = [zeros(K,K) for _ in 1 : N - 1] for n in 1 : N - 1 ZZ_ret[n] = Z_ret[:,n+1] * Z_ret[:,n]' end return Z_ret, ZZ_ret end """ Pick single states having a max probability. """ function winner_takes_all(Z::Matrix{Float64}) Z_ret = zeros(size(Z)) for n in 1 : size(Z_ret, 2) idx = indmax(Z[:,n]) Z_ret[idx,n] = 1 end return Z_ret end function logmatprod(ln_A::Array{Float64}, ln_B::Array{Float64}) I = size(ln_A, 1) J = size(ln_B, 2) ln_C = zeros(I, J) for i in 1 : I for j in 1 : J ln_C[i, j] = logsumexp(ln_A[i, :] + ln_B[:, j]) end end return ln_C end function update_Z_fb(bhmm::BHMM, X::Vector{Float64}) K = bhmm.K N = length(X) # calc likelihood ln_lik = zeros(K, N) for k in 1 : K ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b) lambda = bhmm.cmp[k].a / bhmm.cmp[k].b for n in 1 : N ln_lik[k,n] =X[n]'*(ln_lambda) - lambda end end expt_ln_phi = digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi)) expt_ln_A = zeros(K,K) for k in 1 : K expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k])) end Z, ZZ = fb_alg(ln_lik, expt_ln_phi, expt_ln_A) # different notation ZZ_ret = [ZZ[:,:,n] for n in 1:size(ZZ, 3)] return Z, ZZ_ret end function fb_alg(ln_lik::Matrix{Float64}, ln_phi::Vector{Float64}, ln_A::Matrix{Float64}) K, T = size(ln_lik) ln_Z = zeros(K, T) ln_ZZ = zeros(K, K, T) ln_alpha = zeros(K, T) ln_beta = zeros(K, T) ln_st = zeros(T) for t in 1 : T if t == 1 ln_alpha[:, 1] = ln_phi + ln_lik[:, 1] else ln_alpha[:, t] = logmatprod(ln_A, ln_alpha[:, t-1]) + ln_lik[:, t] end ln_st[t] = logsumexp(ln_alpha[:, t]) ln_alpha[:,t] = ln_alpha[:,t] - ln_st[t] end for t in T-1 : -1 : 1 ln_beta[:, t] = logmatprod(ln_A', ln_beta[:, t+1] + ln_lik[:,t+1]) ln_beta[:, t] = ln_beta[:, t] - ln_st[t+1] end ln_Z = ln_alpha + ln_beta for t in 1 : T if t < T ln_ZZ[:,:,t] = (repmat(ln_alpha[:, t]', K, 1) + ln_A + repmat(ln_lik[:, t+1] + ln_beta[:,t+1], 1, K)) ln_ZZ[:,:,t] = ln_ZZ[:,:,t] - ln_st[t+1] end end return exp.(ln_Z), exp.(ln_ZZ) end """ Compute approximate posterior distributions via variational inference. """ function learn_VI(X::Vector{Float64}, prior_bhmm::BHMM, max_iter::Int) # initialisation expt_Z, expt_ZZ = init_Z(X, prior_bhmm) bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # E-step #expt_Z, expt_ZZ = update_Z(bhmm, X, expt_Z) expt_Z, expt_ZZ = update_Z_fb(bhmm, X) # M-step bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ) end return expt_Z, bhmm end end ================================================ FILE: src/PoissonMixtureModel.jl ================================================ """ Bayesian Poisson Mixture Model """ module PoissonMixtureModel using StatsFuns.logsumexp, SpecialFunctions.digamma using Distributions export Gam, BPMM, Poi, PMM export sample_PMM, sample_data, winner_takes_all export learn_GS, learn_CGS, learn_VI #################### ## Types struct Gam # Parameters of Gamma distribution a::Vector{Float64} b::Float64 end struct BPMM # Parameters of Bayesian Poisson Mixture Model D::Int K::Int alpha::Vector{Float64} cmp::Vector{Gam} end struct Poi # Parameters of Poisson Distribution lambda::Vector{Float64} end struct PMM # Parameters of Poisson Mixture Model D::Int K::Int phi::Vector{Float64} cmp::Vector{Poi} end #################### ## Common functions """ Sample a PMM given hyperparameters. """ function sample_PMM(bpmm::BPMM) cmp = Vector{Poi}() for c in bpmm.cmp lambda = Vector{Float64}() for d in 1 : bpmm.D push!(lambda, rand(Gamma(c.a[d], 1.0/c.b))) end push!(cmp, Poi(lambda)) end phi = rand(Dirichlet(bpmm.alpha)) return PMM(bpmm.D, bpmm.K, phi, cmp) end """ Sample data from a specific PMM model. """ function sample_data(pmm::PMM, N::Int) X = zeros(pmm.D, N) S = categorical_sample(pmm.phi, N) for n in 1 : N k = indmax(S[:, n]) for d in 1 : pmm.D X[d,n] = rand(Poisson(pmm.cmp[k].lambda[d])) end end return X, S end categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1] function categorical_sample(p::Vector{Float64}, N::Int) K = length(p) S = zeros(K, N) S_tmp = rand(Categorical(p), N) for k in 1 : K S[k,find(S_tmp.==k)] = 1 end return S end function init_S(X::Matrix{Float64}, bpmm::BPMM) N = size(X, 2) K = bpmm.K S = categorical_sample(ones(K)/K, N) return S end function calc_ELBO(X::Matrix{Float64}, pri::BPMM, pos::BPMM) ln_expt_S = update_S(pos, X) expt_S = exp.(ln_expt_S) K, N = size(expt_S) D = size(X, 1) expt_ln_lambda = zeros(D, K) expt_lambda = zeros(D, K) expt_ln_lkh = 0 for k in 1 : K expt_ln_lambda[:,k] = digamma.(pos.cmp[k].a) - log.(pos.cmp[k].b) expt_lambda[:,k] = pos.cmp[k].a / pos.cmp[k].b for n in 1 : N expt_ln_lkh += expt_S[k,n] * (X[:, n]' * expt_ln_lambda[:,k] - sum(expt_lambda[:,k]) - sum(lgamma.(X[:,n]+1)))[1] end end expt_ln_pS = sum(expt_S' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha)))) expt_ln_qS = sum(expt_S .* ln_expt_S) KL_lambda = 0 for k in 1 : K KL_lambda += (sum(pos.cmp[k].a)*log.(pos.cmp[k].b) - sum(pri.cmp[k].a)*log.(pri.cmp[k].b) - sum(lgamma.(pos.cmp[k].a)) + sum(lgamma.(pri.cmp[k].a)) + (pos.cmp[k].a - pri.cmp[k].a)' * expt_ln_lambda[:,k] + (pri.cmp[k].b - pos.cmp[k].b) * sum(expt_lambda[:,k]) )[1] end KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha)) - sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha)) + (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha))) )[1] VB = expt_ln_lkh + expt_ln_pS - expt_ln_qS - (KL_lambda + KL_pi) return VB end function add_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64}) D = bpmm.D K = bpmm.K sum_S = sum(S, 2) alpha = [bpmm.alpha[k] + sum_S[k] for k in 1 : K] cmp = Vector{Gam}() XS = X*S'; for k in 1 : K a = [(bpmm.cmp[k].a[d] + XS[d,k])::Float64 for d in 1 : D] b = bpmm.cmp[k].b + sum_S[k] push!(cmp, Gam(a, b)) end return BPMM(D, K, alpha, cmp) end remove_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bpmm, X, -S) #################### ## used for Variational Inference function update_S(bpmm::BPMM, X::Matrix{Float64}) D, N = size(X) K = bpmm.K ln_expt_S = zeros(K, N) tmp = zeros(K) sum_digamma_tmp = digamma.(sum(bpmm.alpha)) for k in 1 : K tmp[k] = - sum(bpmm.cmp[k].a) / bpmm.cmp[k].b tmp[k] += digamma.(bpmm.alpha[k]) - sum_digamma_tmp end ln_lambda_X = [X'*(digamma.(bpmm.cmp[k].a) - log.(bpmm.cmp[k].b)) for k in 1 : K] for n in 1 : N tmp_ln_pi = [tmp[k] + ln_lambda_X[k][n] for k in 1 : K] ln_expt_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi) end return ln_expt_S end """ Pick single states having a max probability. """ function winner_takes_all(S::Matrix{Float64}) S_ret = zeros(size(S)) for n in 1 : size(S_ret, 2) idx = indmax(S[:,n]) S_ret[idx,n] = 1 end return S_ret end #################### ## used for Gibbs Sampling function sample_S_GS(pmm::PMM, X::Matrix{Float64}) D, N = size(X) K = pmm.K S = zeros(K, N) tmp = [-sum(pmm.cmp[k].lambda) + log.(pmm.phi[k]) for k in 1 : K] ln_lambda_X = [X'*log.(pmm.cmp[k].lambda) for k in 1 : K] for n in 1 : N tmp_ln_phi = [(tmp[k] + ln_lambda_X[k][n])::Float64 for k in 1 : K] tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi) S[:,n] = categorical_sample(exp.(tmp_ln_phi)) end return S end #################### ## used for Collapsed Gibbs Sampling function calc_ln_NB(Xn::Vector{Float64}, gam::Gam) ln_lkh = [(gam.a[d]*log.(gam.b) - lgamma.(gam.a[d]) + lgamma.(Xn[d] + gam.a[d]) - (Xn[d] + gam.a[d])*log.(gam.b + 1) )::Float64 for d in 1 : size(Xn, 1)] return sum(ln_lkh) end function sample_Sn(Xn::Vector{Float64}, bpmm::BPMM) ln_tmp = [(calc_ln_NB(Xn, bpmm.cmp[k]) + log.(bpmm.alpha[k])) for k in 1 : bpmm.K] ln_tmp = ln_tmp - logsumexp(ln_tmp) Sn = categorical_sample(exp.(ln_tmp)) return Sn end function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bpmm::BPMM) D, N = size(X) K = size(S, 1) for n in randperm(N) # remove bpmm = remove_stats(bpmm, X[:,[n]], S[:,[n]]) # sample S[:,n] = sample_Sn(X[:,n], bpmm) # insert bpmm = add_stats(bpmm, X[:,[n]], S[:,[n]]) end return S, bpmm end #################### ## Algorithm main """ Compute posterior distribution via variational inference. """ function learn_VI(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int) # initialisation expt_S = init_S(X, prior_bpmm) bpmm = add_stats(prior_bpmm, X, expt_S) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # E-step expt_S = exp.(update_S(bpmm, X)) # M-step bpmm = add_stats(prior_bpmm, X, expt_S) # calc VB VB[i] = calc_ELBO(X, prior_bpmm, bpmm) end return expt_S, bpmm, VB end """ Compute posterior distribution via Gibbs sampling. """ function learn_GS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int) # initialisation S = init_S(X, prior_bpmm) bpmm = add_stats(prior_bpmm, X, S) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # sample parameters pmm = sample_PMM(bpmm) # sample latent variables S = sample_S_GS(pmm, X) # update current model bpmm = add_stats(prior_bpmm, X, S) # calc VB VB[i] = calc_ELBO(X, prior_bpmm, bpmm) end return S, bpmm, VB end """ Compute posterior distribution via collapsed Gibbs sampling. """ function learn_CGS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int) # initialisation S = init_S(X, prior_bpmm) bpmm = add_stats(prior_bpmm, X, S) VB = NaN * zeros(max_iter) # inference for i in 1 : max_iter # directly sample S S, bpmm = sample_S_CGS(S, X, bpmm) # calc VB VB[i] = calc_ELBO(X, prior_bpmm, bpmm) end return S, bpmm, VB end end ================================================ FILE: src/demo_BayesNeuralNet.jl ================================================ #################################### ## Demo script for Bayesian neural network. using PyPlot, PyCall push!(LOAD_PATH, ".") import BayesNeuralNet """ Sample neural nets from prior. """ function sample_test() # model parameters D = 1 # output K = 3 # hidden M = 2 # input sigma2_w = 10.0 sigma2_y = 0.1 xmin = -5 xmax = 5 N_lin = 1000 X_lin = ones(M, N_lin) X_lin[1,:] = linspace(xmin, xmax, N_lin) X_lin[2,:] = 1 # bias # visualize num_samples = 5 figure("Function samples") clf() for i in 1 : num_samples _, Y_true, _, _ = BayesNeuralNet.sample_data_from_prior(X_lin, sigma2_w, sigma2_y, D, K) plot(X_lin[1,:], Y_true) xlim([xmin, xmax]) end ratey = (ylim()[2] - ylim()[1]) * 0.1 ratex = (xlim()[2] - xlim()[1]) * 0.1 text(xlim()[1] + ratex, ylim()[2] - ratey, @sprintf("K=%d", K), fontsize=18) show() end """ Run a test script of variational inference for Bayesian neural net. """ function test() ################# # prepara data # data size D = 1 # output M = 2 # input # function setting xmin = -2 xmax = 4 N_lin = 1000 X_lin = ones(M, N_lin) X_lin[1,:] = linspace(xmin, xmax, N_lin) X_lin[2,:] = 1 # bias # training data N = 50 # data size X = 2*rand(M, N) - 0.0 # input X[2,:] = 1.0 # bias Y = 0.5*sin.(2*pi * X[1,:]/3) + 0.05 * randn(N) # model parameters K = 5 sigma2_w = 10.0 sigma2_y = 0.01 ################ # inference alpha = 1.0e-5 max_iter = 100000 mu1, rho1, mu2, rho2 = BayesNeuralNet.VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter) Y_mean = [mu2'* tanh.(mu1'X_lin[:,n]) for n in 1 : N_lin] ################ # visualize figure("result") clf() Y_list = [] num_samples = 100 for i in 1 : num_samples Y_est, _ = BayesNeuralNet.sample_data_from_posterior(X_lin, mu1, rho1, mu2, rho2, sigma2_y, D) push!(Y_list, Y_est) plot(X_lin[1,:], Y_est, "-c", alpha=0.25) end plot(X[1,:], Y, "ok") plot(X_lin[1,:], Y_mean, "b-") xlim([xmin, xmax]) xlabel("x") ylabel("y") show() end #sample_test() test() ================================================ FILE: src/demo_DimensionalityReduction.jl ================================================ ################################### ## Demo script for Bayesian Dimensionality Reduction using PyPlot, PyCall @pyimport sklearn.datasets as datasets push!(LOAD_PATH,".") import DimensionalityReduction function load_facedata(skip::Int) face = datasets.fetch_olivetti_faces() Y_raw = face["images"] N, S_raw, _ = size(Y_raw) L = round(Int, S_raw / skip) Y_tmp = Y_raw[:,1:skip:end, 1:skip:end] Y = convert(Array{Float64, 2}, reshape(Y_tmp, N, size(Y_tmp,2)*size(Y_tmp,3))') D = size(Y, 1) return Y, D, L end function visualize(Y::Array{Float64,2}, L::Int) D, N = size(Y) base = round(Int, sqrt(N)) v = round(Int, (L*ceil(N / base))) h = L * base pic = zeros(v, h) for n in 1 : N i = round(Int, (L*ceil(n / base))) idx1 = i - L + 1 : i idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1) pic[idx1,idx2] = reshape(Y[:,n], L, L) end imshow(pic, cmap=ColorMap("gray")) end function visualize(Y::Array{Float64,2}, L::Int, mask::BitArray{2}) # for missing D, N = size(Y) base = round(Int, sqrt(N)) v = round(Int, (L*ceil(N / base))) h = L * base pic = zeros(v, h, 3) Y_3dim = zeros(D, N, 3) for i in 1 : 3 if i == 2 Y_tmp = deepcopy(Y) Y_tmp[mask] = 1 Y_3dim[:,:,i] = Y_tmp else Y_tmp = deepcopy(Y) Y_tmp[mask] = 0 Y_3dim[:,:,i] = Y_tmp end end for n in 1 : N i = round(Int, (L*ceil(n / base))) idx1 = i - L + 1 : i idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1) for i in 1 : 3 pic[idx1,idx2,i] = reshape(Y_3dim[:,n,i], L, L) end end imshow(pic, cmap=ColorMap("gray")) end """ Run a demo script of missing data interpolation for face dataset. """ function test_face_missing() # load data skip = 2 Y, D, L = load_facedata(skip) # mask missing_rate = 0.50 mask = rand(size(Y)) .< missing_rate Y_obs = deepcopy(Y) Y_obs[mask] = NaN # known parames M = 16 sigma2_y = 0.001 Sigma_W = zeros(M,M,D) Sigma_mu = 1.0 * eye(D) for d in 1 : D Sigma_W[:,:,d] = 0.1 * eye(M) end prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu) # learn & generate max_iter = 100 posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter) Y_est = posterior.m_W'*X_est + repmat(posterior.m_mu, 1, size(X_est, 2)) Y_itp = deepcopy(Y_obs) Y_itp[mask] = Y_est[mask] #visualize N_show = 4^2 figure("Observation") clf() visualize(Y_obs[:,1:N_show], L, mask[:,1:N_show]) title("Observation") #figure("Estimation") #clf() #visualize(Y_est[:,1:N_show], L) #title("Estimation") figure("Interpolation") clf() visualize(Y_itp[:,1:N_show], L) title("Interpolation") figure("Truth") clf() visualize(Y[:,1:N_show], L) title("Truth") show() end """ Run a dimensionality reduction demo using Iris dataset. """ function test_iris() ################## # load data iris = datasets.load_iris() Y_obs = iris["data"]' label_list = [iris["target_names"][elem+1] for elem in iris["target"]] D, N = size(Y_obs) ################## # 2D compression # model M = 2 sigma2_y = 0.001 Sigma_W = zeros(M,M,D) Sigma_mu = 1.0 * eye(D) for d in 1 : D Sigma_W[:,:,d] = 0.1 * eye(M) end prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu) # learn & generate max_iter = 100 posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter) # visualize figure("2D plot") clf() scatter(X_est[1,1:50], X_est[2,1:50], color="r") scatter(X_est[1,51:100], X_est[2,51:100], color="g") scatter(X_est[1,101:end], X_est[2,101:end], color="b") xlabel("\$x_1\$", fontsize=20) ylabel("\$x_2\$", fontsize=20) legend([label_list[1], label_list[51], label_list[101]], fontsize=16) ################## # 3D compression # model M = 3 sigma2_y = 0.001 Sigma_W = zeros(M,M,D) Sigma_mu = 1.0 * eye(D) for d in 1 : D Sigma_W[:,:,d] = 0.1 * eye(M) end prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu) # learn & generate max_iter = 100 posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter) # visualize figure("3D plot") clf() scatter3D(X_est[1,1:50], X_est[2,1:50], X_est[3,1:50], c="r") scatter3D(X_est[1,51:100], X_est[2,51:100], X_est[3,51:100], c="g") scatter3D(X_est[1,101:end], X_est[2,101:end], X_est[3,101:end], c="b") legend([label_list[1], label_list[51], label_list[101]], fontsize=16) xlabel("\$x_1\$", fontsize=20) ylabel("\$x_2\$", fontsize=20) zlabel("\$x_3\$", fontsize=20) show() end #test_face_missing() test_iris() ================================================ FILE: src/demo_GaussianMixtureModel.jl ================================================ ################################### ## Example code ## for Bayesian Gaussin Mixture Model using PyPlot, PyCall push!(LOAD_PATH,".") import GaussianMixtureModel """ Visualize data & estimation in 2D space. """ function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text) cmp = get_cmap("jet") K1 = size(S, 1) K2 = size(S_est, 1) col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)] col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)] f, (ax1, ax2) = subplots(1,2,num=text) f[:clf]() f, (ax1, ax2) = subplots(1,2,num=text) for k in 1 : K1 ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k]) end ax1[:set_title]("truth") for k in 1 : K2 ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k]) end ax2[:set_title]("estimation") end """ Run a test script for 2D data clustering. """ function test_2D() ## set model D = 2 # data dimension K = 4 # number of mixture components alpha = 100.0 * ones(K) beta = 0.1 m = zeros(D) nu = D + 1.0 W = eye(D) cmp = [GaussianMixtureModel.GW(beta, m, nu, W) for _ in 1 : K] bgmm = GaussianMixtureModel.BGMM(D, K, alpha, cmp) ## generate data N = 300 gmm = GaussianMixtureModel.sample_GMM(bgmm) X, S = GaussianMixtureModel.sample_data(gmm, N) ## inference max_iter = 100 tic() S_est, post_bgmm, VB = GaussianMixtureModel.learn_VI(X, bgmm, max_iter) #S_est, post_bgmm, VB = GaussianMixtureModel.learn_GS(X, bgmm, max_iter) #S_est, post_bgmm, VB = GaussianMixtureModel.learn_CGS(X, bgmm, max_iter) toc() ## plot visualize_2D(X, S, GaussianMixtureModel.winner_takes_all(S_est), "2D plot") # VB check figure("ELBO") clf() plot(VB) ylabel("ELBO") xlabel("iterations") show() end test_2D() ================================================ FILE: src/demo_LogisticRegression.jl ================================================ ##################################### ## Bayesian logistic regression demo using PyPlot, PyCall using Distributions push!(LOAD_PATH, ".") import LogisticRegression """ Visualize prediction via surface (only for 2D inputs.) """ function visualize_surface(mu, rho, X, Y, text) N = 100 R = 100 xmin = minimum(X[1,:]) xmax = maximum(X[1,:]) ymin = minimum(X[2,:]) ymax = maximum(X[2,:]) lx = xmax - xmin ly = ymax - ymin xmin = xmin - 0.25 * lx xmax = xmax + 0.25 * lx ymin = ymin - 0.25 * ly ymax = ymax + 0.25 * ly x1 = linspace(xmin,xmax,R) x2 = linspace(ymin,ymax,R) x1grid = repmat(x1, 1, R) x2grid = repmat(x2', R, 1) val = [x1grid[:] x2grid[:]]' z_list = [] sigma = log.(1 + exp.(rho)) for n in 1 : N W = rand(MvNormal(mu, diagm(sigma.^2))) z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)] push!(z_list, z_tmp) end z = mean(z_list) zgrid = reshape(z, R, R) # 3D plot figure("surface") clf() plot_surface(x1grid, x2grid, zgrid, alpha=0.5) scatter3D(X[1,Y.==1], X[2,Y.==1], Y[Y.==1]+0.01, c="r", depthshade=true) scatter3D(X[1,Y.==0], X[2,Y.==0], Y[Y.==0], c="b", depthshade=true) xlim([xmin, xmax]) ylim([ymin, ymax]) zlim([0, 1]) title(text) end """ Visualize prediction via contour (only for 2D inputs.) """ function visualize_contour(mu, rho, X, Y) N = 100 R = 100 xmin = 2*minimum(X[1,:]) xmax = 2*maximum(X[1,:]) ymin = minimum(X[2,:]) ymax = maximum(X[2,:]) x1 = linspace(xmin,xmax,R) x2 = linspace(ymin,ymax,R) x1grid = repmat(x1, 1, R) x2grid = repmat(x2', R, 1) val = [x1grid[:] x2grid[:]]' z_list = [] W_list = [] sigma = log.(1 + exp.(rho)) for n in 1 : N W = rand(MvNormal(mu, diagm(sigma.^2))) z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)] push!(W_list, W) push!(z_list, z_tmp) end z = mean(z_list) zgrid = reshape(z, R, R) # precition figure("contour") clf() contour(x1grid, x2grid, zgrid, alpha=0.5, cmap=get_cmap("bwr")) scatter(X[1,Y.==1], X[2,Y.==1], c="r") scatter(X[1,Y.==0], X[2,Y.==0], c="b") xlim([xmin, xmax]) ylim([ymin, ymax]) title("prediction") # parameter samples figure("samples") clf() for n in 1 : 10 draw_line(W_list[n], xmin, xmax) end scatter(X[1,Y.==1]', X[2,Y.==1]', c="r") scatter(X[1,Y.==0]', X[2,Y.==0]', c="b") xlim([xmin, xmax]) ylim([ymin, ymax]) title("parameter samples") end function draw_line(W, xmin, xmax) y1 = - xmin*W[1]/W[2] y2 = - xmax*W[1]/W[2] plot([xmin, xmax], [y1, y2], c="k") end ######################## # create model M = 2 # input dimension Sigma_w = 100.0 * eye(M) # prior on W ######################## # create toy-data using prior model N = 50 # num of data points X = 2 * rand(M, N) - 1.0 # input values # sample observation Y Y, _ = LogisticRegression.sample_data(X, Sigma_w) ######################## # inference alpha = 1.0e-4 # learning rate max_iter = 100000 # VI maximum iterations # learn variational parameters (mu & rho) mu, rho = LogisticRegression.VI(Y, X, M, Sigma_w, alpha, max_iter) ######################## # visualize (only for M=2) visualize_surface(mu, rho, X, Y, "prediction") visualize_contour(mu, rho, X, Y) show() ================================================ FILE: src/demo_NMF.jl ================================================ ############################## ## Audio decomposition demo using NMF using PyPlot, PyCall using DataFrames using Distributions push!(LOAD_PATH, ".") import NMF @pyimport scipy.io.wavfile as wf # load data wavfile = "../data/organ.wav" fs, data = wf.read(wavfile) figure("data") clf() Pxx, freqs, t, pl = specgram(data[10000:318000,2], Fs=fs, NFFT=256, noverlap=0) xlabel("time [sec]") ylabel("frequency [Hz]") ylim([0,22000]) # model D, N = size(Pxx) K = 2 a_t = 1.0 b_t = 1.0 a_v = 1.0 b_v = 100.0 prior = NMF.NMFModel(a_t*ones(D,K), b_t*ones(D, K), a_v, b_v) # inference max_iter = 100 posterior, S_est, T_est, V_est = NMF.VI(Int64.(round.(Pxx)), prior, max_iter) X = T_est * V_est # visualize figure("T") clf() for k in 1 : K subplot(K,1,k) plot(T_est[:,k], linewidth=1.0) xlim([0, D]) ylim([0, ylim()[2]]) end figure("V") clf() for k in 1 : K subplot(K,1,k) plot(V_est[k,:], linewidth=1.0) xlim([0,N]) ylim([0, ylim()[2]]) end show() ================================================ FILE: src/demo_PoissonHMM.jl ================================================ ################################### ## Example code ## for Bayesian Poisson HMM using PyPlot, PyCall using HDF5, JLD @pyimport matplotlib.gridspec as gspec push!(LOAD_PATH,".") import PoissonHMM import PoissonMixtureModel """ Simple comparison between HMM and mixture model. """ function test_comparison() ######################### ## load data file_name = "../data/timeseries.jld" X = load(file_name)["obs"] N = length(X) ######################### ## Poison HMM ## set model K = 2 # number of mixture components alpha_phi = 10.0 * ones(K) alpha_A = 100.0 * eye(K) + 1.0*ones(K, K) cmp = [PoissonHMM.Gam(1.0, 0.01), PoissonHMM.Gam(1.0, 0.01)] bhmm = PoissonHMM.BHMM(K, alpha_phi, alpha_A, cmp) ## inference max_iter = 100 tic() Z_est_hmm, post_bhmm = PoissonHMM.learn_VI(X, bhmm, max_iter) toc() ######################### ## Poison Mixture Model ## set model K = 2 # number of mixture components alpha_phi = 10.0 * ones(K) cmp = [PoissonMixtureModel.Gam([1.0], 0.01), PoissonMixtureModel.Gam([1.0], 0.01)] bpmm = PoissonMixtureModel.BPMM(1, K, alpha_phi, cmp) ## inference max_iter = 100 tic() Z_est_pmm, post_bpmm = PoissonMixtureModel.learn_VI(reshape(X, 1, N), bpmm, max_iter) toc() ######################### ## Compare results figure("Hidden Markov Model vs Mixture Model") subplot(3,1,1);plot(X);ylabel("data") subplot(3,1,2);fill_between(1:N, reshape(Z_est_hmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PHMM)") subplot(3,1,3);fill_between(1:N, reshape(Z_est_pmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PMM)") show() end test_comparison() ================================================ FILE: src/demo_PoissonMixtureModel.jl ================================================ ################################### ## Example code ## for Bayesian Poisson Mixture Model push!(LOAD_PATH,".") using PyPlot, PyCall import PoissonMixtureModel """ Visualize data & estimation in 2D space. """ function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text) cmp = get_cmap("jet") K1 = size(S, 1) K2 = size(S_est, 1) col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)] col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)] f, (ax1, ax2) = subplots(1,2,num=text) f[:clf]() f, (ax1, ax2) = subplots(1,2,num=text) for k in 1 : K1 ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k]) end ax1[:set_title]("truth") for k in 1 : K2 ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k]) end ax2[:set_title]("estimation") end function draw_hist(ax, X, S, label) counts, bins, patches = ax[:hist](X', 20) for i in 1 : length(patches) if counts[i] > 0 S_tmp = S[:,bins[i] .<= X[1,:] .<= bins[i+1]] S_sum = sum(S_tmp, 2) / sum(S_tmp) patches[i][:set_facecolor]((S_sum[1], 0, S_sum[2])) end end ax[:set_title](label) end """ Visualize data & estimation using 1D histogram. """ function visualize_1D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}) # separated figures f1, ax1 = subplots(1,1,num="observation") f2, ax2 = subplots(1,1,num="estimation") f1[:clf]() f2[:clf]() _, ax1 = subplots(1,1,num="observation") _, ax2 = subplots(1,1,num="estimation") ax1[:hist](X', 20) ax1[:set_title]("observation") draw_hist(ax2, X, S_est, "estimation") end """ Run a test script for 1D data clustering. """ function test_1D() ## set model D = 1 # data dimension, must be 1. K = 2 # number of mixture components, must be 2. alpha = 100.0 * ones(K) cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K] bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp) ## generate data N = 1000 pmm = PoissonMixtureModel.sample_PMM(bpmm) X, S = PoissonMixtureModel.sample_data(pmm, N) ## inference max_iter = 100 tic() S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter) #S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter) #S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter) toc() ## plot visualize_1D(X, S, S_est) figure("ELBO") clf() plot(VB) ylabel("ELBO") xlabel("iterations") show() end """ Run a test script for 2D data clustering. """ function test_2D() ## set model D = 2 # data dimension, must be 2. K = 8 # number of mixture components #K = 5 alpha = 100.0 * ones(K) cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K] bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp) ## generate data N = 300 pmm = PoissonMixtureModel.sample_PMM(bpmm) X, S = PoissonMixtureModel.sample_data(pmm, N) ## inference max_iter = 100 tic() S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter) #S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter) #S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter) toc() ## plot visualize_2D(X, S, PoissonMixtureModel.winner_takes_all(S_est), "2D plot") # VB check figure("ELBO") clf() plot(VB) ylabel("ELBO") xlabel("iterations") show() end test_1D() #test_2D() ================================================ FILE: src/demo_PolynomialRegression.jl ================================================ ################################# ## Bayesian model selection demo ## for polynomial regression using PyPlot, PyCall using Distributions function poly(X_raw, M) N = size(X_raw, 1) X = zeros(M, N) for m in 0 : M - 1 X[m+1,:] = X_raw.^m end return X end function learn_bayes(X_raw, Y, M, sig2_y, Sig_w, X_lin) X = poly(X_raw, M) N = size(X_raw, 1) # calc posterior Sig_w_h = inv(X*inv(sig2_y*eye(N))*X' + inv(Sig_w)) mu_w_h = Sig_w_h * (X * inv(sig2_y * eye(N)) * Y) # calc predictive X_test = poly(X_lin, M) Y_est = (mu_w_h'*X_test)' sig2_y_prd = sig2_y + diag(X_test'Sig_w_h*X_test) # calc evidence evidence = -0.5*(sum(Y)*inv(sig2_y) +N*log.(sig2_y) + N*log.(2*pi) + logdet(Sig_w) - (mu_w_h'*inv(Sig_w_h)*mu_w_h)[1] - logdet(Sig_w_h) ) return Y_est, sqrt.(sig2_y_prd), evidence end function test() # linspace X_lin = linspace(-1, 7, 200) # generate data N = 10 sig2_y = 0.1 X = 2*pi*rand(N) Y_true = [sin.(x) for x in X_lin] Y_obs = [sin.(x) + sig2_y * randn() for x in X] dims = [1, 2, 3, 4, 5, 10] # learning via Bayes sig2_w = 1.0 Y_bayes = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin) for m in dims] ############# # compute evidences evidence = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin)[3] for m in dims] figure("evidence") clf() plot(1:length(dims), evidence) xticks(1:length(dims),dims) ylabel(("\$\\ln p(\\bf{Y}|\\bf{X})\$"), fontsize=20) xlabel(("\$M\$"), fontsize=20) ############# # visualize x_min = X_lin[1] x_max = X_lin[end] y_min = -4 y_max = 4 figure("prediction") clf() for k in 1 : 6 subplot(230 + k) plot(X_lin, Y_bayes[k][1]) plot(X_lin, Y_bayes[k][1] + Y_bayes[k][2], "c--") plot(X_lin, Y_bayes[k][1] - Y_bayes[k][2], "c--") plot(X, Y_obs, "ko") xlim([x_min, x_max]) ylim([y_min, y_max]) text(x_max - 2.5, y_max - 1, @sprintf("M=%d", dims[k])) end show() end test() ================================================ FILE: src/demo_Simple2DGauss.jl ================================================ ################################### ## Simple VI & GS for 2D Gaussian using PyPlot using Distributions function calc_KL(mu1, lambda1, mu2, lambda2) D = size(mu1, 1) px_lnqx = 0.5 * logdet(lambda2) - 0.5 * ((mu1 - mu2)' * lambda2 * (mu1 - mu2) + trace(lambda2 * inv(lambda1))) px_lnpx = 0.5 * logdet(lambda1) - 0.5 * D KL = - (px_lnqx - px_lnpx) return KL[1] end function plot_results(result, truth) N = size(result, 1) H = Int(ceil(sqrt(N))) W = Int(ceil(N / H)) for i in 1 : H for j in 1 : W n = (i - 1) * W + j if n <= N subplot(H, W, n) title("$n of $N") plot_gaussian(truth[1], truth[2], "b", "\$p(z)\$") plot_gaussian(result[n][1], result[n][2], "r", "\$p(z)\$") end end end end function plot_lines(X) D, N = size(X) X_d = zeros(D, 2*N + 1) X_d[:,1] = X[:,1] for i in 1 : N X_d[1, 2*i - 1] = X[1, i] X_d[1, 2*i] = X[1, i] X_d[2, 2*i] = X[2, i] X_d[2, 2*i + 1] = X[2, i] end plot(X[1,:], X[2,:], "oy") plot(X_d[1,1:2*N], X_d[2,1:2*N], "--y") end function plot_gaussian(Mu, Sigma, col, label) res = 100 plot(Mu[1], Mu[2], "x", color=col) F = eigfact(Sigma) vec = F.vectors val = F.values dw = 2*pi/res w = dw * (0 : res) c = 1.0 a = sqrt(c*val[1]) b = sqrt(c*val[2]) P1 = a*cos.(w) P2 = b*sin.(w) P = Mu .+ vec'*vcat(P1', P2') plot(P[1, :], P[2, :], "-", color=col, label=label) end """ Variational inference for 2D Gauss. """ function main_VI() ## creat truth distribution D = 2 # dimension theta = 2.0*pi/12 # tilt A = reshape([cos.(theta), -sin.(theta), sin.(theta), cos.(theta)], 2, 2) mu = [0.0, 0.0] lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A') ## initialize #mu_h = randn(D) mu_h = [-0.5, 0.3] lambda_h = zeros(D,D) ## main iteration max_iter = 10 KL = NaN * Array{Float64, 1}(max_iter) result = Array{Any, 1}(max_iter) for i in 1 : max_iter ## update mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (mu_h[2] - mu[2]) lambda_h[1,1] = lambda[1,1] mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (mu_h[1] - mu[1]) lambda_h[2,2] = lambda[2,2] ## calculate KL divergeince KL[i] = calc_KL(mu_h, lambda_h, mu, lambda) ## store the results result[i] = [deepcopy(mu_h), deepcopy(inv(lambda_h))] end ## visualize results figure("result per iteration (VI)") clf() plot_results(result, (mu, inv(lambda))) figure("result (VI)") clf() plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$") plot_gaussian(result[end][1], result[end][2], "r", "\$q(\\bf{z})\$") xlabel("\$z_1\$", fontsize=20) ylabel("\$z_2\$", fontsize=20) legend(fontsize=16) figure("KL divergence (VI)") clf() plot(1:max_iter, KL) ylabel("KL divergence", fontsize=16) xlabel("iteration", fontsize=16) show() end """ Gibbs sampling for 2D Gauss. """ function main_GS() ## creat truth distribution D = 2 # dimension theta = 2.0*pi/12 # tilt A = reshape([cos.(theta), -sin.(theta), sin.(theta), cos.(theta)], 2, 2) mu = [0.0, 0.0] #lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A') lambda = inv(A * inv(reshape([1,0,0,100], 2, 2)) * A') ## initialize #max_iter = 1000 max_iter = 50 X = randn(D, max_iter) mu_h = randn(D) ## main iteration KL = NaN * Array{Float64, 1}(max_iter) for i in 2 : max_iter ## update mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (X[2,i-1] - mu[2]) X[1, i] = rand(Normal(mu_h[1], sqrt(inv(lambda[1,1])))) mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (X[1,i] - mu[1]) X[2, i] = rand(Normal(mu_h[2], sqrt(inv(lambda[2,2])))) if i > D KL[i] = calc_KL(mean(X[:,1:i], 2), inv(cov(X[:,1:i], 2)), mu, lambda) end end ## visualize results expt_mu = mean(X, 2) expt_Sigma = cov(X, 2) figure("samples (GS)") clf() plot_lines(X) plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$") plot_gaussian(expt_mu, expt_Sigma, "r", "\$q(\\bf{z})\$") xlabel("\$z_1\$", fontsize=20) ylabel("\$z_2\$", fontsize=20) legend(fontsize=16) figure("KL divergence (GS)") clf() plot(1:max_iter, KL) ylabel("KL divergence", fontsize=16) xlabel("sample size", fontsize=16) show() end main_VI() main_GS() ================================================ FILE: src/demo_SimpleFitting.jl ================================================ ##################################### ## Simple function fitting demo using PyPlot, PyCall using Distributions # true param W = Array([1.0, 0.0, 1.0]) # generate data sigma = 0.5 N = 20 X = linspace(-0.4,2.4,N) Y = [W[1] + W[2]*x + W[3]*x^2 + sigma*randn() for x in X] X_min = minimum(X) X_max = maximum(X) # regression1 X_all = linspace(X_min, X_max, 100) W1 = sum(Y.*X) / sum(X.^2) Y1 = [W1*x for x in X_all] # regression2 X2 = zeros(3, N) X2[1,:] = 1 X2[2,:] = X X2[3,:] = X.^2 W2 = inv(X2*X2') * X2*Y Y2 = [W2[1] + W2[2]*x + W2[3]*x^2 for x in X_all] # show data figure() plot(X_all, Y1, "b-") plot(X_all, Y2, "g-") plot(X, Y, "ko") legend(["model1","model2","data"], loc="upper left", fontsize=16) xlabel("\$x\$", fontsize=20) ylabel("\$y\$", fontsize=20) show() ================================================ FILE: src/demo_nonconjugate.jl ================================================ using PyPlot, PyCall using Distributions import StatsFuns.logsumexp PyDict(matplotlib["rcParams"])["mathtext.fontset"] = "cm" PyDict(matplotlib["rcParams"])["mathtext.rm"] = "serif" PyDict(matplotlib["rcParams"])["lines.linewidth"] = 1.5 PyDict(matplotlib["rcParams"])["font.family"] = "TakaoPGothic" function expt(a, b, sigma, Y, X, N_s) S = rand(Gamma(a, 1.0/b), N_s) C = mean([exp(sum(logpdf.(Normal(s, sigma), Y))) for s in S]) curve = [exp(sum(logpdf.(Normal(mu, sigma), Y))) * pdf(Gamma(a, 1.0/b), mu) for mu in X] m = mean([s*exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S]) v = mean([(s-m)^2 * exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S]) return curve/C, m, v end X = linspace(-5, 10, 1000) a = 2.0 b = 2.0 mu = 1.0 sigma=1.0 # data N = 10 Y = rand(Normal(mu, sigma), N) # calc posterior N_s = 100000 posterior, m, v = expt(a, b, sigma, Y, X, N_s) a_h = m^2 / v b_h = m / v figure() plot(X, pdf(Normal(mu,sigma), X)) plot(X, pdf(Gamma(a,1.0/b), X)) plot(X, posterior) plot(X, pdf(Gamma(a_h,1.0/b_h), X)) plot(Y, 0.02*ones(N), "o") legend(["generator", "prior", "posterior", "approx", "samples"]) #legend(["データ生成分布", "事前分布", "事後分布", "近似分布", "データ"], fontsize=12) xlim([-3, 6]) ylim([0, 1.8])