Repository: sammy-suyama/BayesBook
Branch: master
Commit: 61cb7ee0f1df
Files: 24
Total size: 66.9 KB
Directory structure:
gitextract_c_lvl76e/
├── LICENSE
├── README.md
├── data/
│ └── timeseries.jld
├── docker/
│ ├── Dockerfile
│ ├── README.md
│ └── add_packages.jl
└── src/
├── BayesNeuralNet.jl
├── DimensionalityReduction.jl
├── GaussianMixtureModel.jl
├── LogisticRegression.jl
├── NMF.jl
├── PoissonHMM.jl
├── PoissonMixtureModel.jl
├── demo_BayesNeuralNet.jl
├── demo_DimensionalityReduction.jl
├── demo_GaussianMixtureModel.jl
├── demo_LogisticRegression.jl
├── demo_NMF.jl
├── demo_PoissonHMM.jl
├── demo_PoissonMixtureModel.jl
├── demo_PolynomialRegression.jl
├── demo_Simple2DGauss.jl
├── demo_SimpleFitting.jl
└── demo_nonconjugate.jl
================================================
FILE CONTENTS
================================================
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2020 Sammy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# BayesBook
「機械学習スタートアップシリーズ ベイズ推論による機械学習入門」のソースコードをアップしています。
* http://www.kspub.co.jp/book/detail/1538320.html
* 正誤表(第1~3刷まで) https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo.pdf
* 正誤表(第4刷まで) https://github.com/sammy-suyama/BayesBook/blob/master/pdf/seigo_v4.pdf
ソースコードはJuliaで書かれています。(推奨Vesion:0.6.0)
* The Julia Language: http://julialang.org/
* Julia Documentation: http://docs.julialang.org/
グラフの描画やテストデータのダウンロードに一部Pythonライブラリを利用しています。
* Python: https://www.python.org/
* Matplotlib: https://matplotlib.org/
* scikit-learn: http://scikit-learn.org/
上記の環境構築が煩わしい場合にはDockerfileも用意しています.
* Docker: https://docs.docker.com/
================================================
FILE: docker/Dockerfile
================================================
FROM python:latest
# Update
RUN apt-get update
# Install matplotlib
RUN pip3 install matplotlib scipy scikit-learn notebook
# Install libraries
RUN apt-get install -y sudo hdf5-tools libzmq3
# Install julia 0.6.0
RUN wget https://julialang-s3.julialang.org/bin/linux/x64/0.6/julia-0.6.0-linux-x86_64.tar.gz && \
tar -xzf julia-0.6.0-linux-x86_64.tar.gz && \
ln -s /julia-903644385b/bin/julia /usr/local/bin/julia
# Set the working directory to /work
WORKDIR /work
# Add julia packages
ADD add_packages.jl /work
RUN julia add_packages.jl
# Download source codes
RUN git clone https://github.com/sammy-suyama/BayesBook.git
# Make port 8888 available to the world outside this container
EXPOSE 8888
# Start jupyter notebook
CMD jupyter notebook --allow-root --port=8888 --ip=0.0.0.0
================================================
FILE: docker/README.md
================================================
# DockerからJupyter notebookを実行する
JuliaやPythonの実行環境構築が煩わしい場合は、Dockerを使ってデモスクリプトをJupyter notebook上で動作させることができます。
Dockerのインストールに関しては公式サイトを参考ください。
* https://docs.docker.com/engine/installation/
`Dockerfile`の置いてあるディレクトリで、イメージを作成・実行します。
$ docker build -t bayesbook .
$ docker run -p 8888:8888 bayesbook
================================================
FILE: docker/add_packages.jl
================================================
Pkg.update()
Pkg.add("PyPlot")
Pkg.add("StatsFuns")
Pkg.add("SpecialFunctions")
Pkg.add("Distributions")
Pkg.add("PDMats")
Pkg.add("ProgressMeter")
Pkg.add("DataFrames")
Pkg.add("HDF5")
Pkg.add("JLD")
Pkg.add("IJulia")
================================================
FILE: src/BayesNeuralNet.jl
================================================
"""
Variational inference for Bayesian neural network
"""
module BayesNeuralNet
using Distributions
export sample_data_from_prior, sample_data_from_posterior
export VI
function sigmoid(x)
return 1.0 / (1.0 + exp.(-x[1]))
end
function rho2sig(rho)
return log.(1 + exp.(rho))
end
function compute_df_dmu(mu, rho, W)
return (W - mu) ./ rho2sig(rho).^2
end
function compute_df_drho(Y, X, mu, rho, W)
return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho)
end
function compute_dprec_drho(rho)
return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho)))
end
function compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1, mu2, rho2, W2)
M, N = size(X)
Y_err1 = zeros(size(W1)) # MxK
Y_err2 = zeros(size(W2)) # KxD
for n in 1 : N
Z = tanh.(W1'*X[:,n]) # Kx1
Y_est = W2'*Z
# 2nd unit, Dx1
delta2 = Y_est - Y[n]
# 1st unit, KxD
delta1 = diagm(1 - Z.^2) * W2 * delta2
Y_err1 += X[:,n] * delta1'
Y_err2 += Z * delta2'
end
df_dw1 = W1/sigma2_w + (mu1 - W1) ./ rho2sig(rho1).^2 + Y_err1 / sigma2_y
df_dw2 = W2/sigma2_w + (mu2 - W2) ./ rho2sig(rho2).^2 + Y_err2 / sigma2_y
return df_dw1, df_dw2
end
"""
Sample data given prior and inputs.
"""
function sample_data_from_prior(X, sigma2_w, sigma2_y, D, K)
M, N = size(X)
W1 = sqrt(sigma2_w) * randn(M, K)
W2 = sqrt(sigma2_w) * randn(K, D)
# sample function
Y = [W2'* tanh.(W1'X[:,n]) for n in 1 : N]
# sample data
Y_obs = [W2'* tanh.(W1'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N]
return Y_obs, Y, W1, W2
end
"""
Sample data given posterior and inputs.
"""
function sample_data_from_posterior(X, mu1, rho1, mu2, rho2, sigma2_y, D)
N = size(X, 2)
ep1 = randn(size(mu1))
W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1
ep2 = randn(size(mu2))
W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2
Y_est = [W2_tmp'* tanh.(W1_tmp'X[:,n]) for n in 1 : N]
Y_obs = [W2_tmp'* tanh.(W1_tmp'X[:,n]) + sqrt(sigma2_y)*randn(D) for n in 1 : N]
return Y_est, Y_obs
end
"""
Compute variational parameters.
"""
function VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter)
M, N = size(X)
D = length(Y[1])
# initialize
mu1 = randn(M, K)
rho1 = randn(M, K)
mu2 = randn(K, D)
rho2 = randn(K, D)
for i in 1 : max_iter
# sample
ep1 = randn(size(mu1))
W1_tmp = mu1 + log.(1 + exp.(rho1)) .* ep1
ep2 = randn(size(mu2))
W2_tmp = mu2 + log.(1 + exp.(rho2)) .* ep2
# calc error
df_dw1, df_dw2 = compute_df_dw(Y, X, sigma2_y, sigma2_w, mu1, rho1, W1_tmp, mu2, rho2, W2_tmp)
# 1st unit
df_dmu1 = compute_df_dmu(mu1, rho1, W1_tmp)
df_drho1 = compute_df_drho(Y, X, mu1, rho1, W1_tmp)
d_mu1 = df_dw1 + df_dmu1
d_rho1 = df_dw1 .* (ep1 ./ (1+exp.(-rho1))) + df_drho1
mu1 = mu1 - alpha * d_mu1
rho1 = rho1 - alpha * d_rho1
# 2nd unit
df_dmu2 = compute_df_dmu(mu2, rho2, W2_tmp)
df_drho2 = compute_df_drho(Y, X, mu2, rho2, W2_tmp)
d_mu2 = df_dw2 + df_dmu2
d_rho2 = df_dw2 .* (ep2 ./ (1+exp.(-rho2))) + df_drho2
mu2 = mu2 - alpha * d_mu2
rho2 = rho2 - alpha * d_rho2
end
return mu1, rho1, mu2, rho2
end
end
================================================
FILE: src/DimensionalityReduction.jl
================================================
"""
Variational inference for Bayesian DimensionalityReduction
"""
module DimensionalityReduction
using Distributions
#using ProgressMeter
export DRModel
export sample_data, VI
####################
## Types
struct DRModel
D::Int
M::Int
sigma2_y::Float64
m_W::Array{Float64, 2} # MxD
Sigma_W::Array{Float64, 3} # MxMxD
m_mu::Array{Float64, 1} # D
Sigma_mu::Array{Float64, 2} # DxD
end
####################
## functions
function sqsum(mat::Array{Float64}, idx::Int)
return squeeze(sum(mat, idx), idx)
end
"""
Sample data given hyperparameters.
"""
function sample_data(N::Int, model::DRModel)
D = model.D
M = model.M
W = zeros(M, D)
mu = zeros(D)
for d in 1 : D
W[:,d] = rand(MvNormal(model.m_W[:,d], model.Sigma_W[:,:,d]))
end
mu = rand(MvNormal(model.m_mu, model.Sigma_mu))
Y = zeros(D, N)
X = randn(M, N)
for n in 1 : N
Y[:,n] = rand(MvNormal(W'*X[:,n] + mu, model.sigma2_y*eye(D)))
end
return Y, X, W, mu
end
function init(Y::Array{Float64, 2}, prior::DRModel)
M = prior.M
D, N = size(Y)
X = randn(M, N)
XX = zeros(M, M, N)
for n in 1 : N
XX[:,:,n] = X[:,n]*X[:,n]' + eye(M)
end
return X, XX
end
function update_W(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel,
X::Array{Float64, 2}, XX::Array{Float64, 3})
D = prior.D
M = prior.M
N = size(Y, 2)
m_W = zeros(M, D)
Sigma_W = zeros(M, M, D)
mu = posterior.m_mu
for d in 1 : D
Sigma_W[:,:,d] = inv(inv(prior.sigma2_y)*sqsum(XX, 3) + inv(prior.Sigma_W[:,:,d]))
m_W[:,d] = Sigma_W[:,:,d]*(inv(prior.sigma2_y)*X*(Y[[d],:] - mu[d]*ones(1, N))'
+ inv(prior.Sigma_W[:,:,d])*prior.m_W[:,d])
end
return DRModel(D, M, prior.sigma2_y, m_W, Sigma_W, posterior.m_mu, posterior.Sigma_mu)
end
function update_mu(Y::Array{Float64, 2}, prior::DRModel, posterior::DRModel,
X::Array{Float64, 2}, XX::Array{Float64, 3})
N = size(Y, 2)
D = prior.D
M = prior.M
W = posterior.m_W
Sigma_mu = inv(N*inv(prior.sigma2_y)*eye(D) + inv(prior.Sigma_mu))
m_mu = Sigma_mu*(inv(prior.sigma2_y)*sqsum(Y - W'*X, 2) + inv(prior.Sigma_mu)*prior.m_mu)
return DRModel(D, M, prior.sigma2_y, posterior.m_W, posterior.Sigma_W, m_mu, Sigma_mu)
end
function update_X(Y::Array{Float64, 2}, posterior::DRModel)
D, N = size(Y)
M = posterior.M
W = posterior.m_W
WW = zeros(M, M, D)
for d in 1 : D
WW[:,:,d] = W[:,d]*W[:,d]' + posterior.Sigma_W[:,:,d]
end
mu = posterior.m_mu
X = zeros(M, N)
XX = zeros(M, M, N)
for n in 1 : N
Sigma = inv(inv(posterior.sigma2_y)*sqsum(WW, 3) + eye(M))
X[:,n] = inv(posterior.sigma2_y)*Sigma*W*(Y[:,n] - mu)
XX[:,:,n] = X[:,n] * X[:,n]' + Sigma
end
return X, XX
end
function interpolate(mask::BitArray{2}, X::Array{Float64, 2}, posterior::DRModel)
Y_est = posterior.m_W'*X + repmat(posterior.m_mu, 1, size(X, 2))
return return Y_est[mask]
end
"""
Compute variational posterior distributions.
"""
function VI(Y::Array{Float64, 2}, prior::DRModel, max_iter::Int)
X, XX = init(Y, prior)
mask = isnan.(Y)
sum_nan = sum(mask)
posterior = deepcopy(prior)
#progress = Progress(max_iter)
for iter in 1 : max_iter
# progress
#next!(progress)
# Interpolate
if sum_nan > 0
Y[mask] = interpolate(mask, X, posterior)
end
# M-step
posterior = update_W(Y, prior, posterior, X, XX)
posterior = update_mu(Y, prior, posterior, X, XX)
# E-step
X, XX = update_X(Y, posterior)
end
return posterior, X
end
end
================================================
FILE: src/GaussianMixtureModel.jl
================================================
"""
Bayesian Gaussian Mixture Model
"""
module GaussianMixtureModel
using StatsFuns.logsumexp, SpecialFunctions.digamma
using Distributions
using PDMats
export GW, BGMM, Gauss, GMM
export sample_GMM, sample_data, winner_takes_all
export learn_GS, learn_CGS, learn_VI
####################
## Types
struct GW
# Parameters of Gauss Wisahrt distribution
beta::Float64
m::Vector{Float64}
nu::Float64
W::Matrix{Float64}
end
struct BGMM
# Parameters of Bayesian Gaussian Mixture Model
D::Int
K::Int
alpha::Vector{Float64}
cmp::Vector{GW}
end
struct Gauss
# Parameters of Gauss Distribution
mu::Vector{Float64}
Lambda::Matrix{Float64}
end
struct GMM
# Parameters of Gauss Mixture Model
D::Int
K::Int
phi::Vector{Float64}
cmp::Vector{Gauss}
end
####################
## Common functions
"""
Sample a GMM given hyperparameters.
"""
function sample_GMM(bgmm::BGMM)
cmp = Vector{Gauss}()
for c in bgmm.cmp
Lambda = rand(Wishart(c.nu, PDMats.PDMat(Symmetric(c.W))))
mu = rand(MvNormal(c.m, PDMats.PDMat(Symmetric(inv(c.beta*Lambda)))))
push!(cmp, Gauss(mu, Lambda))
end
phi = rand(Dirichlet(bgmm.alpha))
return GMM(bgmm.D, bgmm.K, phi, cmp)
end
"""
Sample data from a specific GMM model.
"""
function sample_data(gmm::GMM, N::Int)
X = zeros(gmm.D, N)
S = categorical_sample(gmm.phi, N)
for n in 1 : N
k = indmax(S[:, n])
X[:,n] = rand(MvNormal(gmm.cmp[k].mu, PDMats.PDMat(Symmetric(inv(gmm.cmp[k].Lambda)))))
end
return X, S
end
categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1]
function categorical_sample(p::Vector{Float64}, N::Int)
K = length(p)
S = zeros(K, N)
S_tmp = rand(Categorical(p), N)
for k in 1 : K
S[k,find(S_tmp.==k)] = 1
end
return S
end
function sumdigamma(nu, D)
ret = 0.0
for d in 1 : D
ret += digamma.(0.5*(nu + 1 - d))
end
return ret
end
function init_S(X::Matrix{Float64}, bgmm::BGMM)
N = size(X, 2)
K = bgmm.K
S = categorical_sample(ones(K)/K, N)
return S
end
function calc_ELBO(X::Array{Float64, 2}, pri::BGMM, pos::BGMM)
function logCw(nu, W)
D = size(W, 1)
return -0.5*nu*logdet(W) - 0.5*nu*D*log.(2) - 0.25*D*(D-1)*log.(pi) - sum([lgamma.(0.5*(nu+1-d)) for d in 1 : D])
end
ln_expt_S = update_S(pos, X)
expt_S = exp.(ln_expt_S)
K, N = size(expt_S)
D = size(X, 1)
expt_ln_lkh = 0
for k in 1 : K
expt_Lambda = pos.cmp[k].nu * pos.cmp[k].W
expt_Lambda_mu = pos.cmp[k].nu * pos.cmp[k].W * pos.cmp[k].m
expt_mu_Lambda_mu = (pos.cmp[k].nu * pos.cmp[k].m' * pos.cmp[k].W * pos.cmp[k].m)[1] + D/pos.cmp[k].beta
expt_ln_Lambda = sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W)
expt_ln_pi = digamma.(pos.alpha) - digamma.(sum(pos.alpha))
for n in 1 : N
# <ln p(X|S, mu, Lambda)>
expt_ln_lkh += -0.5 * expt_S[k,n]*(trace(X[:,n]*X[:,n]'*expt_Lambda)
- 2*(X[:,n]'*expt_Lambda_mu)[1]
+ expt_mu_Lambda_mu
- expt_ln_Lambda
+ D * log.(2*pi)
)
# <ln p(S|pi)>
expt_ln_lkh += expt_S[k,n]*expt_ln_pi[k]
end
end
# -<ln q(S)>
expt_ln_lkh -= sum(expt_S.*ln_expt_S)
KL_mu_Lambda = [(0.5*D*(log.(pos.cmp[k].beta) - log.(pri.cmp[k].beta) + pri.cmp[k].beta/pos.cmp[k].beta - pos.cmp[k].nu - 1)
+ 0.5*(pos.cmp[k].nu-pri.cmp[k].nu)*(sumdigamma(pos.cmp[k].nu, D) + D*log.(2) + logdet(pos.cmp[k].W))
+ logCw(pos.cmp[k].nu, pos.cmp[k].W) - logCw(pri.cmp[k].nu, pri.cmp[k].W)
+ 0.5*pos.cmp[k].nu*trace((pri.cmp[k].beta*(pos.cmp[k].m-pri.cmp[k].m)*(pos.cmp[k].m-pri.cmp[k].m)'
+inv(pri.cmp[k].W))*pos.cmp[k].W)) for k in 1 : K]
KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha))
- sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha))
+ (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha)))
)[1]
VB = expt_ln_lkh - (sum(KL_mu_Lambda) + KL_pi)
return VB
end
function add_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64})
D = bgmm.D
K = bgmm.K
sum_S = sum(S, 2)
alpha = [bgmm.alpha[k] + sum_S[k] for k in 1 : K]
cmp = Vector{GW}()
XS = X*S';
for k in 1 : K
beta = bgmm.cmp[k].beta + sum_S[k]
m = (1.0/beta)*(vec(X*S[[k],:]') + bgmm.cmp[k].beta*bgmm.cmp[k].m)
nu = bgmm.cmp[k].nu + sum_S[k]
W = inv(X*diagm(S[k,:])*X'
- beta*m*m'
+ bgmm.cmp[k].beta*bgmm.cmp[k].m*bgmm.cmp[k].m'
+ inv(bgmm.cmp[k].W))
push!(cmp, GW(beta, m, nu, W))
end
return BGMM(D, K, alpha, cmp)
end
remove_stats(bgmm::BGMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bgmm, X, -S)
####################
## used for Variational Inference
function update_S(bgmm::BGMM, X::Matrix{Float64})
D, N = size(X)
K = bgmm.K
ln_S = zeros(K, N)
tmp = zeros(K)
tmp = NaN * zeros(K)
sum_digamma_tmp = digamma.(sum(bgmm.alpha))
for k in 1 : K
tmp[k] = -0.5*(bgmm.cmp[k].nu*trace(bgmm.cmp[k].m*bgmm.cmp[k].m'*bgmm.cmp[k].W)
+ D*(1.0/bgmm.cmp[k].beta)
- (sumdigamma(bgmm.cmp[k].nu, D) + logdet(bgmm.cmp[k].W)))
tmp[k] += digamma.(bgmm.alpha[k]) - sum_digamma_tmp
end
for n in 1 : N
tmp_ln_pi = NaN * zeros(K)
for k in 1 : K
tmp_ln_pi[k] = tmp[k] -0.5*bgmm.cmp[k].nu*trace((X[:,n]*X[:,n]' - 2*bgmm.cmp[k].m*X[:,n]')*bgmm.cmp[k].W)
end
ln_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi)
end
return ln_S
end
"""
Pick single states having a max probability.
"""
function winner_takes_all(S::Matrix{Float64})
S_ret = zeros(size(S))
for n in 1 : size(S_ret, 2)
idx = indmax(S[:,n])
S_ret[idx,n] = 1
end
return S_ret
end
####################
## used for Gibbs Sampling
function sample_S_GS(gmm::GMM, X::Matrix{Float64})
D, N = size(X)
K = gmm.K
S = zeros(K, N)
tmp = [0.5*logdet(gmm.cmp[k].Lambda) + log.(gmm.phi[k]) for k in 1 : K]
for n in 1 : N
tmp_ln_phi = [-0.5*trace(gmm.cmp[k].Lambda*(X[:,n] - gmm.cmp[k].mu)*(X[:,n] - gmm.cmp[k].mu)') + tmp[k] for k in 1 : K]
tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi)
S[:,n] = categorical_sample(exp.(tmp_ln_phi))
end
return S
end
####################
## used for Collapsed Gibbs Sampling
function calc_ln_ST(Xn::Vector{Float64}, gw::GW)
# TODO; need to check value?
D = size(Xn, 1)
W = ((1 - D + gw.nu)*gw.beta / (1 + gw.beta)) * gw.W
#ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, (gw.nu/(gw.nu - 2))*inv(W)), Xn)
ln_lkh = logpdf(MvTDist(1 - D + gw.nu, gw.m, PDMats.PDMat(Symmetric(inv(W)))), Xn)
return sum(ln_lkh)
end
function sample_Sn(Xn::Vector{Float64}, bgmm::BGMM)
ln_tmp = [(calc_ln_ST(Xn, bgmm.cmp[k]) + log.(bgmm.alpha[k])) for k in 1 : bgmm.K]
ln_tmp = ln_tmp - logsumexp(ln_tmp)
Sn = categorical_sample(exp.(ln_tmp))
return Sn
end
function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bgmm::BGMM)
D, N = size(X)
K = size(S, 1)
for n in randperm(N)
# remove
bgmm = remove_stats(bgmm, X[:,[n]], S[:,[n]])
# sample
S[:,n] = sample_Sn(X[:,n], bgmm)
# insert
bgmm = add_stats(bgmm, X[:,[n]], S[:,[n]])
end
return S, bgmm
end
####################
## Algorithm main
"""
Compute posterior distributions via variational inference.
"""
function learn_VI(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int)
# initialisation
expt_S = init_S(X, prior_bgmm)
bgmm = add_stats(prior_bgmm, X, expt_S)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# E-step
expt_S = exp.(update_S(bgmm, X))
# M-step
bgmm = add_stats(prior_bgmm, X, expt_S)
# calc VB
VB[i] = calc_ELBO(X, prior_bgmm, bgmm)
end
# assign binary values
S = winner_takes_all(expt_S)
return S, bgmm, VB
end
"""
Compute posterior distributions via Gibbs sampling.
"""
function learn_GS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int)
# initialisation
S = init_S(X, prior_bgmm)
bgmm = add_stats(prior_bgmm, X, S)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# sample parameters
gmm = sample_GMM(bgmm)
# sample latent variables
S = sample_S_GS(gmm, X)
# update current model
bgmm = add_stats(prior_bgmm, X, S)
# calc VB
VB[i] = calc_ELBO(X, prior_bgmm, bgmm)
end
return S, bgmm, VB
end
"""
Compute posterior distributions via collapsed Gibbs sampling.
"""
function learn_CGS(X::Matrix{Float64}, prior_bgmm::BGMM, max_iter::Int)
# initialisation
S = init_S(X, prior_bgmm)
bgmm = add_stats(prior_bgmm, X, S)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# directly sample S
S, bgmm = sample_S_CGS(S, X, bgmm)
# calc VB
VB[i] = calc_ELBO(X, prior_bgmm, bgmm)
end
return S, bgmm, VB
end
end
================================================
FILE: src/LogisticRegression.jl
================================================
"""
Variational inference for Bayesian logistic regression.
"""
module LogisticRegression
using Distributions
export sigmoid, sample_data, VI
function sigmoid(x)
return 1.0 / (1.0 + exp.(-x[1]))
end
function bern_sample(mu)
i = rand(Bernoulli(mu))
val = zeros(2)
val[i+1] = 1
return val
end
"""
Sample data & parameter given covariance Sigma_w and inputs X.
"""
function sample_data(X, Sigma_w)
N = size(X, 2)
M = size(Sigma_w, 1)
# sample parameters
W = rand(MvNormal(zeros(M), Sigma_w))
# sample data
Y = [rand(Bernoulli(sigmoid(W'*X[:, n]))) for n in 1 : N]
return Y, W
end
"""
Compute variational parameters.
"""
function VI(Y, X, M, Sigma_w, alpha, max_iter)
function rho2sig(rho)
return log.(1 + exp.(rho))
end
function compute_df_dw(Y, X, Sigma_w, mu, rho, W)
M, N = size(X)
term1 = (mu - W) ./ rho2sig(rho).^2
term2 = inv(Sigma_w)*W
term3 = 0
for n in 1 : N
term3 += -(Y[n] - sigmoid(W'*X[:,n])) * X[:,n]
end
return term1 + term2 + term3
end
function compute_df_dmu(mu, rho, W)
return (W - mu) ./ rho2sig(rho).^2
end
function compute_df_drho(Y, X, Sigma_w, mu, rho, W)
return -0.5*((W - mu).^2 - rho2sig(rho).^2) .* compute_dprec_drho(rho)
end
function compute_dprec_drho(rho)
return 2 * rho2sig(rho) .^ (-3) .* (1 ./ (1+exp.(rho))).^2 .* (1 ./ (1+exp.(-rho)))
end
# diag gaussian for approximate posterior
mu = randn(M)
rho = randn(M) # sigma = log.(1 + exp.(rho))
for i in 1 : max_iter
# sample epsilon
ep = rand(M)
W_tmp = mu + log.(1 + exp.(rho)) .* ep
# calculate gradient
df_dw = compute_df_dw(Y, X, Sigma_w, mu, rho, W_tmp)
df_dmu = compute_df_dmu(mu, rho, W_tmp)
df_drho = compute_df_drho(Y, X, Sigma_w, mu, rho, W_tmp)
d_mu = df_dw + df_dmu
d_rho = df_dw .* (ep ./ (1+exp.(-rho))) + df_drho
# update variational parameters
mu = mu - alpha * d_mu
rho = rho - alpha * d_rho
end
return mu, rho
end
end
================================================
FILE: src/NMF.jl
================================================
"""
Variational inference for Bayesian NMF
"""
module NMF
using Distributions
using StatsFuns.logsumexp, SpecialFunctions.digamma
export NMFModel
export sample_data, VI
####################
## Types
struct NMFModel
a_t::Array{Float64, 2} # D x K
b_t::Array{Float64, 2} # D x L
a_v::Float64 # 1 dim
b_v::Float64 # 1 dim
end
function sqsum(mat::Array, idx)
return squeeze(sum(mat, idx), idx)
end
####################
## functions
function init(X::Array{Int64, 2}, model::NMFModel)
D, N = size(X)
K = size(model.a_t, 2)
S = zeros(D, K, N)
A_t = rand(D, K)
B_t = rand(D, K)
A_v = rand(K, N)
B_v = rand(K, N)
for d in 1 : D
for k in 1 : K
for n in 1 : N
S[d,k,n] = X[d,n] * A_t[d,k] * B_t[d,k] * A_v[k,n] * B_v[k,n]
end
end
end
return S, A_t, B_t, A_v, B_v
end
function update_S(X::Array{Int64, 2}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2})
D, K = size(A_t)
N = size(A_v, 2)
S = zeros(D, K, N)
for d in 1 : D
for n in 1 : N
# K dim
ln_P = (digamma.(A_t[d,:]) + log.(B_t[d,:])
+ digamma.(A_v[:,n]) + log.(B_v[:,n])
)
ln_P = ln_P - logsumexp(ln_P)
S[d,:,n] = X[d,n] * exp.(ln_P)
end
end
return S
end
function update_T(S::Array{Float64, 3}, A_v::Array{Float64, 2}, B_v::Array{Float64, 2}, model::NMFModel)
D, K, N = size(S)
a_t = model.a_t # DxK
b_t = model.b_t # DxK
A_t = a_t + sqsum(S, 3)
B_t = (a_t ./ b_t + repmat(sqsum(A_v.*B_v, 2)', D, 1)).^(-1)
return A_t, B_t
end
function update_V(S::Array{Float64, 3}, A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel)
a_v = model.a_v
b_v = model.b_v
D, K, N = size(S)
A_v = a_v + sqsum(S, 1)
B_v = (a_v / b_v + repmat(sqsum(A_t.*B_t, 1), 1, N)).^(-1)
return A_v, B_v
end
"""
Sample data given hyperparameters.
"""
function sample_data(N::Int, model::NMFModel)
# TODO; check b or 1/b ?
D, K = size(model.a_t)
T = zeros(D, K)
for d in 1 : D
for k in 1 : K
T[d,k] = rand(Gamma(model.a_t[d,k], 1.0/model.b_t[d,k])) # TODO: check
end
end
V = reshape(rand(Gamma(model.a_v, 1.0/model.b_v), K*N) , K, N) # TODO: check
S = zeros(D, K, N)
for d in 1 : D
for k in 1 : K
for n in 1 : N
S[d,k,n] = T[d,k] * V[k,n]
end
end
end
#X = sqsum(S, 2) + 0.0 # zero noise
X = sqsum(S, 2)
return X, T, S, V
end
function update_model(A_t::Array{Float64, 2}, B_t::Array{Float64, 2}, model::NMFModel)
return NMFModel(A_t, B_t, model.a_v, model.b_v)
end
"""
Compute variational posterior distributions.
"""
function VI(X::Array{Int64, 2}, model::NMFModel, max_iter::Int)
K = size(model.a_t, 2)
D, N = size(X)
S, A_t, B_t, A_v, B_v = init(X, model)
for iter in 1 : max_iter
# latent
S = update_S(X, A_t, B_t, A_v, B_v)
A_v, B_v = update_V(S, A_t, B_t, model)
# param
A_t, B_t = update_T(S, A_v, B_v, model)
end
return update_model(A_t, B_t, model), S, A_t.*B_t, A_v.*B_v
end
end
================================================
FILE: src/PoissonHMM.jl
================================================
"""
Bayesian 1dim Poisson Hidden Markov Model
"""
module PoissonHMM
using StatsFuns.logsumexp, SpecialFunctions.digamma
using Distributions
export Gam, GHMM, Poi, HMM
export sample_HMM, sample_data, winner_takes_all
export learn_VI
####################
## Types
struct Gam
# Parameters of Gamma distribution
# 1dim
a::Float64
b::Float64
end
struct BHMM
# Parameters of Bayesian Bernoulli Mixture Model
K::Int
alpha_phi::Vector{Float64}
alpha_A::Matrix{Float64}
cmp::Vector{Gam}
end
struct Poi
# Parameters of Poisson Distribution
# 1 dim
lambda::Float64
end
struct HMM
# Parameters of Bernoulli Mixture Model
K::Int
phi::Vector{Float64}
A::Matrix{Float64}
cmp::Vector{Poi}
end
####################
## Common functions
"""
Sample an HMM from prior
"""
function sample_HMM(bhmm::BHMM)
cmp = Vector{Poi}()
for c in bhmm.cmp
lambda = rand(Gamma(c.a, 1.0/c.b))
push!(cmp, Poi(lambda))
end
phi = rand(Dirichlet(bhmm.alpha_phi))
A = zeros(size(bhmm.alpha_A))
for k in 1 : bhmm.K
A[:,k] = rand(Dirichlet(bhmm.alpha_A[:,k]))
end
return HMM(bhmm.K, phi, A, cmp)
end
"""
Sample data from a specific Poisson HMM
"""
function sample_data(hmm::HMM, N::Int)
X = zeros(N)
Z = zeros(hmm.K, N)
# sample (n=1)
Z[:,1] = categorical_sample(hmm.phi)
k = indmax(Z[:, 1])
X[1] = rand(Poisson(hmm.cmp[k].lambda))
# sample (n>1)
for n in 2 : N
Z[:,n] = categorical_sample(hmm.A[:,k])
k = indmax(Z[:, n])
X[n] = rand(Poisson(hmm.cmp[k].lambda))
end
return X, Z
end
categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1]
function categorical_sample(p::Vector{Float64}, N::Int)
K = length(p)
S = zeros(K, N)
S_tmp = rand(Categorical(p), N)
for k in 1 : K
S[k,find(S_tmp.==k)] = 1
end
return S
end
function init_Z(X::Vector{Float64}, bhmm::BHMM)
N = size(X, 1)
K = bhmm.K
Z = rand(Dirichlet(ones(K)/K), N)
ZZ = [zeros(K,K) for _ in 1 : N - 1]
for n in 1 : N - 1
ZZ[n] = Z[:,n+1] * Z[:,n]'
end
return Z, ZZ
end
"""
Not implemented yet.
"""
function calc_ELBO(X::Matrix{Float64}, pri::BHMM, pos::BHMM)
end
function add_stats(bhmm::BHMM, X::Vector{Float64},
Z::Matrix{Float64}, ZZ::Vector{Matrix{Float64}})
K = bhmm.K
sum_Z = sum(Z, 2)
alpha_phi = [bhmm.alpha_phi[k] + Z[k,1] for k in 1 : K]
alpha_A = bhmm.alpha_A + sum(ZZ)
cmp = Vector{Gam}()
ZX = Z*X # (KxN) x (Nx1) = Kx1
for k in 1 : K
a = bhmm.cmp[k].a + ZX[k]
b = bhmm.cmp[k].b + sum_Z[k]
push!(cmp, Gam(a, b))
end
return BHMM(K, alpha_phi, alpha_A, cmp)
end
remove_stats(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64}) = add_stats(bhmm, X, -Z)
####################
## used for Variational Inference
function update_Z(bhmm::BHMM, X::Vector{Float64}, Z::Matrix{Float64})
N = size(X, 1)
K = bhmm.K
ln_expt_Z = zeros(K, N)
ln_lkh = zeros(K, N)
for k in 1 : K
ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b)
lambda = bhmm.cmp[k].a / bhmm.cmp[k].b
for n in 1 : N
ln_lkh[k,n] = X[n]'*(ln_lambda) - lambda
end
end
expt_ln_A = zeros(size(bhmm.alpha_A))
for k in 1 : K
expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k]))
end
# copy
ln_expt_Z = log.(Z)
# n = 1
ln_expt_Z[:,1] = (digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi))
+ expt_ln_A' * exp.(ln_expt_Z[:,2])
+ ln_lkh[:,1]
)
ln_expt_Z[:,1] = ln_expt_Z[:,1] - logsumexp(ln_expt_Z[:,1])
# 2 <= n <= N - 1
for n in 2 : N - 1
ln_expt_Z[:,n] =( expt_ln_A * exp.(ln_expt_Z[:,n-1])
+ expt_ln_A' * exp.(ln_expt_Z[:,n+1])
+ ln_lkh[:,n]
)
ln_expt_Z[:,n] = ln_expt_Z[:,n] - logsumexp(ln_expt_Z[:,n])
end
# n = N
ln_expt_Z[:,N] =( expt_ln_A * exp.(ln_expt_Z[:,N-1])
+ ln_lkh[:,N]
)
ln_expt_Z[:,N] = ln_expt_Z[:,N] - logsumexp(ln_expt_Z[:,N])
# calc output
Z_ret = exp.(ln_expt_Z)
ZZ_ret = [zeros(K,K) for _ in 1 : N - 1]
for n in 1 : N - 1
ZZ_ret[n] = Z_ret[:,n+1] * Z_ret[:,n]'
end
return Z_ret, ZZ_ret
end
"""
Pick single states having a max probability.
"""
function winner_takes_all(Z::Matrix{Float64})
Z_ret = zeros(size(Z))
for n in 1 : size(Z_ret, 2)
idx = indmax(Z[:,n])
Z_ret[idx,n] = 1
end
return Z_ret
end
function logmatprod(ln_A::Array{Float64}, ln_B::Array{Float64})
I = size(ln_A, 1)
J = size(ln_B, 2)
ln_C = zeros(I, J)
for i in 1 : I
for j in 1 : J
ln_C[i, j] = logsumexp(ln_A[i, :] + ln_B[:, j])
end
end
return ln_C
end
function update_Z_fb(bhmm::BHMM, X::Vector{Float64})
K = bhmm.K
N = length(X)
# calc likelihood
ln_lik = zeros(K, N)
for k in 1 : K
ln_lambda = digamma.(bhmm.cmp[k].a) - log.(bhmm.cmp[k].b)
lambda = bhmm.cmp[k].a / bhmm.cmp[k].b
for n in 1 : N
ln_lik[k,n] =X[n]'*(ln_lambda) - lambda
end
end
expt_ln_phi = digamma.(bhmm.alpha_phi) - digamma.(sum(bhmm.alpha_phi))
expt_ln_A = zeros(K,K)
for k in 1 : K
expt_ln_A[:,k] = digamma.(bhmm.alpha_A[:,k]) - digamma.(sum(bhmm.alpha_A[:,k]))
end
Z, ZZ = fb_alg(ln_lik, expt_ln_phi, expt_ln_A)
# different notation
ZZ_ret = [ZZ[:,:,n] for n in 1:size(ZZ, 3)]
return Z, ZZ_ret
end
function fb_alg(ln_lik::Matrix{Float64}, ln_phi::Vector{Float64}, ln_A::Matrix{Float64})
K, T = size(ln_lik)
ln_Z = zeros(K, T)
ln_ZZ = zeros(K, K, T)
ln_alpha = zeros(K, T)
ln_beta = zeros(K, T)
ln_st = zeros(T)
for t in 1 : T
if t == 1
ln_alpha[:, 1] = ln_phi + ln_lik[:, 1]
else
ln_alpha[:, t] = logmatprod(ln_A, ln_alpha[:, t-1]) + ln_lik[:, t]
end
ln_st[t] = logsumexp(ln_alpha[:, t])
ln_alpha[:,t] = ln_alpha[:,t] - ln_st[t]
end
for t in T-1 : -1 : 1
ln_beta[:, t] = logmatprod(ln_A', ln_beta[:, t+1] + ln_lik[:,t+1])
ln_beta[:, t] = ln_beta[:, t] - ln_st[t+1]
end
ln_Z = ln_alpha + ln_beta
for t in 1 : T
if t < T
ln_ZZ[:,:,t] = (repmat(ln_alpha[:, t]', K, 1) + ln_A
+ repmat(ln_lik[:, t+1] + ln_beta[:,t+1], 1, K))
ln_ZZ[:,:,t] = ln_ZZ[:,:,t] - ln_st[t+1]
end
end
return exp.(ln_Z), exp.(ln_ZZ)
end
"""
Compute approximate posterior distributions via variational inference.
"""
function learn_VI(X::Vector{Float64}, prior_bhmm::BHMM, max_iter::Int)
# initialisation
expt_Z, expt_ZZ = init_Z(X, prior_bhmm)
bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# E-step
#expt_Z, expt_ZZ = update_Z(bhmm, X, expt_Z)
expt_Z, expt_ZZ = update_Z_fb(bhmm, X)
# M-step
bhmm = add_stats(prior_bhmm, X, expt_Z, expt_ZZ)
end
return expt_Z, bhmm
end
end
================================================
FILE: src/PoissonMixtureModel.jl
================================================
"""
Bayesian Poisson Mixture Model
"""
module PoissonMixtureModel
using StatsFuns.logsumexp, SpecialFunctions.digamma
using Distributions
export Gam, BPMM, Poi, PMM
export sample_PMM, sample_data, winner_takes_all
export learn_GS, learn_CGS, learn_VI
####################
## Types
struct Gam
# Parameters of Gamma distribution
a::Vector{Float64}
b::Float64
end
struct BPMM
# Parameters of Bayesian Poisson Mixture Model
D::Int
K::Int
alpha::Vector{Float64}
cmp::Vector{Gam}
end
struct Poi
# Parameters of Poisson Distribution
lambda::Vector{Float64}
end
struct PMM
# Parameters of Poisson Mixture Model
D::Int
K::Int
phi::Vector{Float64}
cmp::Vector{Poi}
end
####################
## Common functions
"""
Sample a PMM given hyperparameters.
"""
function sample_PMM(bpmm::BPMM)
cmp = Vector{Poi}()
for c in bpmm.cmp
lambda = Vector{Float64}()
for d in 1 : bpmm.D
push!(lambda, rand(Gamma(c.a[d], 1.0/c.b)))
end
push!(cmp, Poi(lambda))
end
phi = rand(Dirichlet(bpmm.alpha))
return PMM(bpmm.D, bpmm.K, phi, cmp)
end
"""
Sample data from a specific PMM model.
"""
function sample_data(pmm::PMM, N::Int)
X = zeros(pmm.D, N)
S = categorical_sample(pmm.phi, N)
for n in 1 : N
k = indmax(S[:, n])
for d in 1 : pmm.D
X[d,n] = rand(Poisson(pmm.cmp[k].lambda[d]))
end
end
return X, S
end
categorical_sample(p::Vector{Float64}) = categorical_sample(p, 1)[:,1]
function categorical_sample(p::Vector{Float64}, N::Int)
K = length(p)
S = zeros(K, N)
S_tmp = rand(Categorical(p), N)
for k in 1 : K
S[k,find(S_tmp.==k)] = 1
end
return S
end
function init_S(X::Matrix{Float64}, bpmm::BPMM)
N = size(X, 2)
K = bpmm.K
S = categorical_sample(ones(K)/K, N)
return S
end
function calc_ELBO(X::Matrix{Float64}, pri::BPMM, pos::BPMM)
ln_expt_S = update_S(pos, X)
expt_S = exp.(ln_expt_S)
K, N = size(expt_S)
D = size(X, 1)
expt_ln_lambda = zeros(D, K)
expt_lambda = zeros(D, K)
expt_ln_lkh = 0
for k in 1 : K
expt_ln_lambda[:,k] = digamma.(pos.cmp[k].a) - log.(pos.cmp[k].b)
expt_lambda[:,k] = pos.cmp[k].a / pos.cmp[k].b
for n in 1 : N
expt_ln_lkh += expt_S[k,n] * (X[:, n]' * expt_ln_lambda[:,k]
- sum(expt_lambda[:,k]) - sum(lgamma.(X[:,n]+1)))[1]
end
end
expt_ln_pS = sum(expt_S' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha))))
expt_ln_qS = sum(expt_S .* ln_expt_S)
KL_lambda = 0
for k in 1 : K
KL_lambda += (sum(pos.cmp[k].a)*log.(pos.cmp[k].b) - sum(pri.cmp[k].a)*log.(pri.cmp[k].b)
- sum(lgamma.(pos.cmp[k].a)) + sum(lgamma.(pri.cmp[k].a))
+ (pos.cmp[k].a - pri.cmp[k].a)' * expt_ln_lambda[:,k]
+ (pri.cmp[k].b - pos.cmp[k].b) * sum(expt_lambda[:,k])
)[1]
end
KL_pi = (lgamma.(sum(pos.alpha)) - lgamma.(sum(pri.alpha))
- sum(lgamma.(pos.alpha)) + sum(lgamma.(pri.alpha))
+ (pos.alpha - pri.alpha)' * (digamma.(pos.alpha) - digamma.(sum(pos.alpha)))
)[1]
VB = expt_ln_lkh + expt_ln_pS - expt_ln_qS - (KL_lambda + KL_pi)
return VB
end
function add_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64})
D = bpmm.D
K = bpmm.K
sum_S = sum(S, 2)
alpha = [bpmm.alpha[k] + sum_S[k] for k in 1 : K]
cmp = Vector{Gam}()
XS = X*S';
for k in 1 : K
a = [(bpmm.cmp[k].a[d] + XS[d,k])::Float64 for d in 1 : D]
b = bpmm.cmp[k].b + sum_S[k]
push!(cmp, Gam(a, b))
end
return BPMM(D, K, alpha, cmp)
end
remove_stats(bpmm::BPMM, X::Matrix{Float64}, S::Matrix{Float64}) = add_stats(bpmm, X, -S)
####################
## used for Variational Inference
function update_S(bpmm::BPMM, X::Matrix{Float64})
D, N = size(X)
K = bpmm.K
ln_expt_S = zeros(K, N)
tmp = zeros(K)
sum_digamma_tmp = digamma.(sum(bpmm.alpha))
for k in 1 : K
tmp[k] = - sum(bpmm.cmp[k].a) / bpmm.cmp[k].b
tmp[k] += digamma.(bpmm.alpha[k]) - sum_digamma_tmp
end
ln_lambda_X = [X'*(digamma.(bpmm.cmp[k].a) - log.(bpmm.cmp[k].b)) for k in 1 : K]
for n in 1 : N
tmp_ln_pi = [tmp[k] + ln_lambda_X[k][n] for k in 1 : K]
ln_expt_S[:,n] = tmp_ln_pi - logsumexp(tmp_ln_pi)
end
return ln_expt_S
end
"""
Pick single states having a max probability.
"""
function winner_takes_all(S::Matrix{Float64})
S_ret = zeros(size(S))
for n in 1 : size(S_ret, 2)
idx = indmax(S[:,n])
S_ret[idx,n] = 1
end
return S_ret
end
####################
## used for Gibbs Sampling
function sample_S_GS(pmm::PMM, X::Matrix{Float64})
D, N = size(X)
K = pmm.K
S = zeros(K, N)
tmp = [-sum(pmm.cmp[k].lambda) + log.(pmm.phi[k]) for k in 1 : K]
ln_lambda_X = [X'*log.(pmm.cmp[k].lambda) for k in 1 : K]
for n in 1 : N
tmp_ln_phi = [(tmp[k] + ln_lambda_X[k][n])::Float64 for k in 1 : K]
tmp_ln_phi = tmp_ln_phi - logsumexp(tmp_ln_phi)
S[:,n] = categorical_sample(exp.(tmp_ln_phi))
end
return S
end
####################
## used for Collapsed Gibbs Sampling
function calc_ln_NB(Xn::Vector{Float64}, gam::Gam)
ln_lkh = [(gam.a[d]*log.(gam.b)
- lgamma.(gam.a[d])
+ lgamma.(Xn[d] + gam.a[d])
- (Xn[d] + gam.a[d])*log.(gam.b + 1)
)::Float64 for d in 1 : size(Xn, 1)]
return sum(ln_lkh)
end
function sample_Sn(Xn::Vector{Float64}, bpmm::BPMM)
ln_tmp = [(calc_ln_NB(Xn, bpmm.cmp[k]) + log.(bpmm.alpha[k])) for k in 1 : bpmm.K]
ln_tmp = ln_tmp - logsumexp(ln_tmp)
Sn = categorical_sample(exp.(ln_tmp))
return Sn
end
function sample_S_CGS(S::Matrix{Float64}, X::Matrix{Float64}, bpmm::BPMM)
D, N = size(X)
K = size(S, 1)
for n in randperm(N)
# remove
bpmm = remove_stats(bpmm, X[:,[n]], S[:,[n]])
# sample
S[:,n] = sample_Sn(X[:,n], bpmm)
# insert
bpmm = add_stats(bpmm, X[:,[n]], S[:,[n]])
end
return S, bpmm
end
####################
## Algorithm main
"""
Compute posterior distribution via variational inference.
"""
function learn_VI(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int)
# initialisation
expt_S = init_S(X, prior_bpmm)
bpmm = add_stats(prior_bpmm, X, expt_S)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# E-step
expt_S = exp.(update_S(bpmm, X))
# M-step
bpmm = add_stats(prior_bpmm, X, expt_S)
# calc VB
VB[i] = calc_ELBO(X, prior_bpmm, bpmm)
end
return expt_S, bpmm, VB
end
"""
Compute posterior distribution via Gibbs sampling.
"""
function learn_GS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int)
# initialisation
S = init_S(X, prior_bpmm)
bpmm = add_stats(prior_bpmm, X, S)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# sample parameters
pmm = sample_PMM(bpmm)
# sample latent variables
S = sample_S_GS(pmm, X)
# update current model
bpmm = add_stats(prior_bpmm, X, S)
# calc VB
VB[i] = calc_ELBO(X, prior_bpmm, bpmm)
end
return S, bpmm, VB
end
"""
Compute posterior distribution via collapsed Gibbs sampling.
"""
function learn_CGS(X::Matrix{Float64}, prior_bpmm::BPMM, max_iter::Int)
# initialisation
S = init_S(X, prior_bpmm)
bpmm = add_stats(prior_bpmm, X, S)
VB = NaN * zeros(max_iter)
# inference
for i in 1 : max_iter
# directly sample S
S, bpmm = sample_S_CGS(S, X, bpmm)
# calc VB
VB[i] = calc_ELBO(X, prior_bpmm, bpmm)
end
return S, bpmm, VB
end
end
================================================
FILE: src/demo_BayesNeuralNet.jl
================================================
####################################
## Demo script for Bayesian neural network.
using PyPlot, PyCall
push!(LOAD_PATH, ".")
import BayesNeuralNet
"""
Sample neural nets from prior.
"""
function sample_test()
# model parameters
D = 1 # output
K = 3 # hidden
M = 2 # input
sigma2_w = 10.0
sigma2_y = 0.1
xmin = -5
xmax = 5
N_lin = 1000
X_lin = ones(M, N_lin)
X_lin[1,:] = linspace(xmin, xmax, N_lin)
X_lin[2,:] = 1 # bias
# visualize
num_samples = 5
figure("Function samples")
clf()
for i in 1 : num_samples
_, Y_true, _, _ = BayesNeuralNet.sample_data_from_prior(X_lin, sigma2_w, sigma2_y, D, K)
plot(X_lin[1,:], Y_true)
xlim([xmin, xmax])
end
ratey = (ylim()[2] - ylim()[1]) * 0.1
ratex = (xlim()[2] - xlim()[1]) * 0.1
text(xlim()[1] + ratex, ylim()[2] - ratey, @sprintf("K=%d", K), fontsize=18)
show()
end
"""
Run a test script of variational inference for Bayesian neural net.
"""
function test()
#################
# prepara data
# data size
D = 1 # output
M = 2 # input
# function setting
xmin = -2
xmax = 4
N_lin = 1000
X_lin = ones(M, N_lin)
X_lin[1,:] = linspace(xmin, xmax, N_lin)
X_lin[2,:] = 1 # bias
# training data
N = 50 # data size
X = 2*rand(M, N) - 0.0 # input
X[2,:] = 1.0 # bias
Y = 0.5*sin.(2*pi * X[1,:]/3) + 0.05 * randn(N)
# model parameters
K = 5
sigma2_w = 10.0
sigma2_y = 0.01
################
# inference
alpha = 1.0e-5
max_iter = 100000
mu1, rho1, mu2, rho2 = BayesNeuralNet.VI(Y, X, sigma2_w, sigma2_y, K, alpha, max_iter)
Y_mean = [mu2'* tanh.(mu1'X_lin[:,n]) for n in 1 : N_lin]
################
# visualize
figure("result")
clf()
Y_list = []
num_samples = 100
for i in 1 : num_samples
Y_est, _ = BayesNeuralNet.sample_data_from_posterior(X_lin, mu1, rho1, mu2, rho2, sigma2_y, D)
push!(Y_list, Y_est)
plot(X_lin[1,:], Y_est, "-c", alpha=0.25)
end
plot(X[1,:], Y, "ok")
plot(X_lin[1,:], Y_mean, "b-")
xlim([xmin, xmax])
xlabel("x")
ylabel("y")
show()
end
#sample_test()
test()
================================================
FILE: src/demo_DimensionalityReduction.jl
================================================
###################################
## Demo script for Bayesian Dimensionality Reduction
using PyPlot, PyCall
@pyimport sklearn.datasets as datasets
push!(LOAD_PATH,".")
import DimensionalityReduction
function load_facedata(skip::Int)
face = datasets.fetch_olivetti_faces()
Y_raw = face["images"]
N, S_raw, _ = size(Y_raw)
L = round(Int, S_raw / skip)
Y_tmp = Y_raw[:,1:skip:end, 1:skip:end]
Y = convert(Array{Float64, 2}, reshape(Y_tmp, N, size(Y_tmp,2)*size(Y_tmp,3))')
D = size(Y, 1)
return Y, D, L
end
function visualize(Y::Array{Float64,2}, L::Int)
D, N = size(Y)
base = round(Int, sqrt(N))
v = round(Int, (L*ceil(N / base)))
h = L * base
pic = zeros(v, h)
for n in 1 : N
i = round(Int, (L*ceil(n / base)))
idx1 = i - L + 1 : i
idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1)
pic[idx1,idx2] = reshape(Y[:,n], L, L)
end
imshow(pic, cmap=ColorMap("gray"))
end
function visualize(Y::Array{Float64,2}, L::Int, mask::BitArray{2})
# for missing
D, N = size(Y)
base = round(Int, sqrt(N))
v = round(Int, (L*ceil(N / base)))
h = L * base
pic = zeros(v, h, 3)
Y_3dim = zeros(D, N, 3)
for i in 1 : 3
if i == 2
Y_tmp = deepcopy(Y)
Y_tmp[mask] = 1
Y_3dim[:,:,i] = Y_tmp
else
Y_tmp = deepcopy(Y)
Y_tmp[mask] = 0
Y_3dim[:,:,i] = Y_tmp
end
end
for n in 1 : N
i = round(Int, (L*ceil(n / base)))
idx1 = i - L + 1 : i
idx2 = L*mod(n-1, base)+1 : L*(mod(n-1, base) + 1)
for i in 1 : 3
pic[idx1,idx2,i] = reshape(Y_3dim[:,n,i], L, L)
end
end
imshow(pic, cmap=ColorMap("gray"))
end
"""
Run a demo script of missing data interpolation for face dataset.
"""
function test_face_missing()
# load data
skip = 2
Y, D, L = load_facedata(skip)
# mask
missing_rate = 0.50
mask = rand(size(Y)) .< missing_rate
Y_obs = deepcopy(Y)
Y_obs[mask] = NaN
# known parames
M = 16
sigma2_y = 0.001
Sigma_W = zeros(M,M,D)
Sigma_mu = 1.0 * eye(D)
for d in 1 : D
Sigma_W[:,:,d] = 0.1 * eye(M)
end
prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu)
# learn & generate
max_iter = 100
posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter)
Y_est = posterior.m_W'*X_est + repmat(posterior.m_mu, 1, size(X_est, 2))
Y_itp = deepcopy(Y_obs)
Y_itp[mask] = Y_est[mask]
#visualize
N_show = 4^2
figure("Observation")
clf()
visualize(Y_obs[:,1:N_show], L, mask[:,1:N_show])
title("Observation")
#figure("Estimation")
#clf()
#visualize(Y_est[:,1:N_show], L)
#title("Estimation")
figure("Interpolation")
clf()
visualize(Y_itp[:,1:N_show], L)
title("Interpolation")
figure("Truth")
clf()
visualize(Y[:,1:N_show], L)
title("Truth")
show()
end
"""
Run a dimensionality reduction demo using Iris dataset.
"""
function test_iris()
##################
# load data
iris = datasets.load_iris()
Y_obs = iris["data"]'
label_list = [iris["target_names"][elem+1] for elem in iris["target"]]
D, N = size(Y_obs)
##################
# 2D compression
# model
M = 2
sigma2_y = 0.001
Sigma_W = zeros(M,M,D)
Sigma_mu = 1.0 * eye(D)
for d in 1 : D
Sigma_W[:,:,d] = 0.1 * eye(M)
end
prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu)
# learn & generate
max_iter = 100
posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter)
# visualize
figure("2D plot")
clf()
scatter(X_est[1,1:50], X_est[2,1:50], color="r")
scatter(X_est[1,51:100], X_est[2,51:100], color="g")
scatter(X_est[1,101:end], X_est[2,101:end], color="b")
xlabel("\$x_1\$", fontsize=20)
ylabel("\$x_2\$", fontsize=20)
legend([label_list[1], label_list[51], label_list[101]], fontsize=16)
##################
# 3D compression
# model
M = 3
sigma2_y = 0.001
Sigma_W = zeros(M,M,D)
Sigma_mu = 1.0 * eye(D)
for d in 1 : D
Sigma_W[:,:,d] = 0.1 * eye(M)
end
prior = DimensionalityReduction.DRModel(D, M, sigma2_y, zeros(M, D), Sigma_W, zeros(D), Sigma_mu)
# learn & generate
max_iter = 100
posterior, X_est = DimensionalityReduction.VI(deepcopy(Y_obs), prior, max_iter)
# visualize
figure("3D plot")
clf()
scatter3D(X_est[1,1:50], X_est[2,1:50], X_est[3,1:50], c="r")
scatter3D(X_est[1,51:100], X_est[2,51:100], X_est[3,51:100], c="g")
scatter3D(X_est[1,101:end], X_est[2,101:end], X_est[3,101:end], c="b")
legend([label_list[1], label_list[51], label_list[101]], fontsize=16)
xlabel("\$x_1\$", fontsize=20)
ylabel("\$x_2\$", fontsize=20)
zlabel("\$x_3\$", fontsize=20)
show()
end
#test_face_missing()
test_iris()
================================================
FILE: src/demo_GaussianMixtureModel.jl
================================================
###################################
## Example code
## for Bayesian Gaussin Mixture Model
using PyPlot, PyCall
push!(LOAD_PATH,".")
import GaussianMixtureModel
"""
Visualize data & estimation in 2D space.
"""
function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text)
cmp = get_cmap("jet")
K1 = size(S, 1)
K2 = size(S_est, 1)
col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)]
col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)]
f, (ax1, ax2) = subplots(1,2,num=text)
f[:clf]()
f, (ax1, ax2) = subplots(1,2,num=text)
for k in 1 : K1
ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k])
end
ax1[:set_title]("truth")
for k in 1 : K2
ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k])
end
ax2[:set_title]("estimation")
end
"""
Run a test script for 2D data clustering.
"""
function test_2D()
## set model
D = 2 # data dimension
K = 4 # number of mixture components
alpha = 100.0 * ones(K)
beta = 0.1
m = zeros(D)
nu = D + 1.0
W = eye(D)
cmp = [GaussianMixtureModel.GW(beta, m, nu, W) for _ in 1 : K]
bgmm = GaussianMixtureModel.BGMM(D, K, alpha, cmp)
## generate data
N = 300
gmm = GaussianMixtureModel.sample_GMM(bgmm)
X, S = GaussianMixtureModel.sample_data(gmm, N)
## inference
max_iter = 100
tic()
S_est, post_bgmm, VB = GaussianMixtureModel.learn_VI(X, bgmm, max_iter)
#S_est, post_bgmm, VB = GaussianMixtureModel.learn_GS(X, bgmm, max_iter)
#S_est, post_bgmm, VB = GaussianMixtureModel.learn_CGS(X, bgmm, max_iter)
toc()
## plot
visualize_2D(X, S, GaussianMixtureModel.winner_takes_all(S_est), "2D plot")
# VB check
figure("ELBO")
clf()
plot(VB)
ylabel("ELBO")
xlabel("iterations")
show()
end
test_2D()
================================================
FILE: src/demo_LogisticRegression.jl
================================================
#####################################
## Bayesian logistic regression demo
using PyPlot, PyCall
using Distributions
push!(LOAD_PATH, ".")
import LogisticRegression
"""
Visualize prediction via surface (only for 2D inputs.)
"""
function visualize_surface(mu, rho, X, Y, text)
N = 100
R = 100
xmin = minimum(X[1,:])
xmax = maximum(X[1,:])
ymin = minimum(X[2,:])
ymax = maximum(X[2,:])
lx = xmax - xmin
ly = ymax - ymin
xmin = xmin - 0.25 * lx
xmax = xmax + 0.25 * lx
ymin = ymin - 0.25 * ly
ymax = ymax + 0.25 * ly
x1 = linspace(xmin,xmax,R)
x2 = linspace(ymin,ymax,R)
x1grid = repmat(x1, 1, R)
x2grid = repmat(x2', R, 1)
val = [x1grid[:] x2grid[:]]'
z_list = []
sigma = log.(1 + exp.(rho))
for n in 1 : N
W = rand(MvNormal(mu, diagm(sigma.^2)))
z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)]
push!(z_list, z_tmp)
end
z = mean(z_list)
zgrid = reshape(z, R, R)
# 3D plot
figure("surface")
clf()
plot_surface(x1grid, x2grid, zgrid, alpha=0.5)
scatter3D(X[1,Y.==1], X[2,Y.==1], Y[Y.==1]+0.01, c="r", depthshade=true)
scatter3D(X[1,Y.==0], X[2,Y.==0], Y[Y.==0], c="b", depthshade=true)
xlim([xmin, xmax])
ylim([ymin, ymax])
zlim([0, 1])
title(text)
end
"""
Visualize prediction via contour (only for 2D inputs.)
"""
function visualize_contour(mu, rho, X, Y)
N = 100
R = 100
xmin = 2*minimum(X[1,:])
xmax = 2*maximum(X[1,:])
ymin = minimum(X[2,:])
ymax = maximum(X[2,:])
x1 = linspace(xmin,xmax,R)
x2 = linspace(ymin,ymax,R)
x1grid = repmat(x1, 1, R)
x2grid = repmat(x2', R, 1)
val = [x1grid[:] x2grid[:]]'
z_list = []
W_list = []
sigma = log.(1 + exp.(rho))
for n in 1 : N
W = rand(MvNormal(mu, diagm(sigma.^2)))
z_tmp = [LogisticRegression.sigmoid(W'*val[:,i]) for i in 1 : size(val, 2)]
push!(W_list, W)
push!(z_list, z_tmp)
end
z = mean(z_list)
zgrid = reshape(z, R, R)
# precition
figure("contour")
clf()
contour(x1grid, x2grid, zgrid, alpha=0.5, cmap=get_cmap("bwr"))
scatter(X[1,Y.==1], X[2,Y.==1], c="r")
scatter(X[1,Y.==0], X[2,Y.==0], c="b")
xlim([xmin, xmax])
ylim([ymin, ymax])
title("prediction")
# parameter samples
figure("samples")
clf()
for n in 1 : 10
draw_line(W_list[n], xmin, xmax)
end
scatter(X[1,Y.==1]', X[2,Y.==1]', c="r")
scatter(X[1,Y.==0]', X[2,Y.==0]', c="b")
xlim([xmin, xmax])
ylim([ymin, ymax])
title("parameter samples")
end
function draw_line(W, xmin, xmax)
y1 = - xmin*W[1]/W[2]
y2 = - xmax*W[1]/W[2]
plot([xmin, xmax], [y1, y2], c="k")
end
########################
# create model
M = 2 # input dimension
Sigma_w = 100.0 * eye(M) # prior on W
########################
# create toy-data using prior model
N = 50 # num of data points
X = 2 * rand(M, N) - 1.0 # input values
# sample observation Y
Y, _ = LogisticRegression.sample_data(X, Sigma_w)
########################
# inference
alpha = 1.0e-4 # learning rate
max_iter = 100000 # VI maximum iterations
# learn variational parameters (mu & rho)
mu, rho = LogisticRegression.VI(Y, X, M, Sigma_w, alpha, max_iter)
########################
# visualize (only for M=2)
visualize_surface(mu, rho, X, Y, "prediction")
visualize_contour(mu, rho, X, Y)
show()
================================================
FILE: src/demo_NMF.jl
================================================
##############################
## Audio decomposition demo using NMF
using PyPlot, PyCall
using DataFrames
using Distributions
push!(LOAD_PATH, ".")
import NMF
@pyimport scipy.io.wavfile as wf
# load data
wavfile = "../data/organ.wav"
fs, data = wf.read(wavfile)
figure("data")
clf()
Pxx, freqs, t, pl = specgram(data[10000:318000,2], Fs=fs, NFFT=256, noverlap=0)
xlabel("time [sec]")
ylabel("frequency [Hz]")
ylim([0,22000])
# model
D, N = size(Pxx)
K = 2
a_t = 1.0
b_t = 1.0
a_v = 1.0
b_v = 100.0
prior = NMF.NMFModel(a_t*ones(D,K), b_t*ones(D, K), a_v, b_v)
# inference
max_iter = 100
posterior, S_est, T_est, V_est = NMF.VI(Int64.(round.(Pxx)), prior, max_iter)
X = T_est * V_est
# visualize
figure("T")
clf()
for k in 1 : K
subplot(K,1,k)
plot(T_est[:,k], linewidth=1.0)
xlim([0, D])
ylim([0, ylim()[2]])
end
figure("V")
clf()
for k in 1 : K
subplot(K,1,k)
plot(V_est[k,:], linewidth=1.0)
xlim([0,N])
ylim([0, ylim()[2]])
end
show()
================================================
FILE: src/demo_PoissonHMM.jl
================================================
###################################
## Example code
## for Bayesian Poisson HMM
using PyPlot, PyCall
using HDF5, JLD
@pyimport matplotlib.gridspec as gspec
push!(LOAD_PATH,".")
import PoissonHMM
import PoissonMixtureModel
"""
Simple comparison between HMM and mixture model.
"""
function test_comparison()
#########################
## load data
file_name = "../data/timeseries.jld"
X = load(file_name)["obs"]
N = length(X)
#########################
## Poison HMM
## set model
K = 2 # number of mixture components
alpha_phi = 10.0 * ones(K)
alpha_A = 100.0 * eye(K) + 1.0*ones(K, K)
cmp = [PoissonHMM.Gam(1.0, 0.01), PoissonHMM.Gam(1.0, 0.01)]
bhmm = PoissonHMM.BHMM(K, alpha_phi, alpha_A, cmp)
## inference
max_iter = 100
tic()
Z_est_hmm, post_bhmm = PoissonHMM.learn_VI(X, bhmm, max_iter)
toc()
#########################
## Poison Mixture Model
## set model
K = 2 # number of mixture components
alpha_phi = 10.0 * ones(K)
cmp = [PoissonMixtureModel.Gam([1.0], 0.01), PoissonMixtureModel.Gam([1.0], 0.01)]
bpmm = PoissonMixtureModel.BPMM(1, K, alpha_phi, cmp)
## inference
max_iter = 100
tic()
Z_est_pmm, post_bpmm = PoissonMixtureModel.learn_VI(reshape(X, 1, N), bpmm, max_iter)
toc()
#########################
## Compare results
figure("Hidden Markov Model vs Mixture Model")
subplot(3,1,1);plot(X);ylabel("data")
subplot(3,1,2);fill_between(1:N, reshape(Z_est_hmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PHMM)")
subplot(3,1,3);fill_between(1:N, reshape(Z_est_pmm[1,:]', N), zeros(N));ylim([0.0, 1.0]);ylabel("S (PMM)")
show()
end
test_comparison()
================================================
FILE: src/demo_PoissonMixtureModel.jl
================================================
###################################
## Example code
## for Bayesian Poisson Mixture Model
push!(LOAD_PATH,".")
using PyPlot, PyCall
import PoissonMixtureModel
"""
Visualize data & estimation in 2D space.
"""
function visualize_2D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64}, text)
cmp = get_cmap("jet")
K1 = size(S, 1)
K2 = size(S_est, 1)
col1 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K1)]
col2 = [pycall(cmp.o, PyAny, Int(round(val)))[1:3] for val in linspace(0,255,K2)]
f, (ax1, ax2) = subplots(1,2,num=text)
f[:clf]()
f, (ax1, ax2) = subplots(1,2,num=text)
for k in 1 : K1
ax1[:scatter](X[1, S[k,:].==1], X[2, S[k,:].==1], color=col1[k])
end
ax1[:set_title]("truth")
for k in 1 : K2
ax2[:scatter](X[1, S_est[k,:].==1], X[2, S_est[k,:].==1], color=col2[k])
end
ax2[:set_title]("estimation")
end
function draw_hist(ax, X, S, label)
counts, bins, patches = ax[:hist](X', 20)
for i in 1 : length(patches)
if counts[i] > 0
S_tmp = S[:,bins[i] .<= X[1,:] .<= bins[i+1]]
S_sum = sum(S_tmp, 2) / sum(S_tmp)
patches[i][:set_facecolor]((S_sum[1], 0, S_sum[2]))
end
end
ax[:set_title](label)
end
"""
Visualize data & estimation using 1D histogram.
"""
function visualize_1D(X::Matrix{Float64}, S::Matrix{Float64}, S_est::Matrix{Float64})
# separated figures
f1, ax1 = subplots(1,1,num="observation")
f2, ax2 = subplots(1,1,num="estimation")
f1[:clf]()
f2[:clf]()
_, ax1 = subplots(1,1,num="observation")
_, ax2 = subplots(1,1,num="estimation")
ax1[:hist](X', 20)
ax1[:set_title]("observation")
draw_hist(ax2, X, S_est, "estimation")
end
"""
Run a test script for 1D data clustering.
"""
function test_1D()
## set model
D = 1 # data dimension, must be 1.
K = 2 # number of mixture components, must be 2.
alpha = 100.0 * ones(K)
cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K]
bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp)
## generate data
N = 1000
pmm = PoissonMixtureModel.sample_PMM(bpmm)
X, S = PoissonMixtureModel.sample_data(pmm, N)
## inference
max_iter = 100
tic()
S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter)
#S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter)
#S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter)
toc()
## plot
visualize_1D(X, S, S_est)
figure("ELBO")
clf()
plot(VB)
ylabel("ELBO")
xlabel("iterations")
show()
end
"""
Run a test script for 2D data clustering.
"""
function test_2D()
## set model
D = 2 # data dimension, must be 2.
K = 8 # number of mixture components
#K = 5
alpha = 100.0 * ones(K)
cmp = [PoissonMixtureModel.Gam(1.0*ones(D), 0.01) for i in 1 : K]
bpmm = PoissonMixtureModel.BPMM(D, K, alpha, cmp)
## generate data
N = 300
pmm = PoissonMixtureModel.sample_PMM(bpmm)
X, S = PoissonMixtureModel.sample_data(pmm, N)
## inference
max_iter = 100
tic()
S_est, post_bpmm, VB = PoissonMixtureModel.learn_VI(X, bpmm, max_iter)
#S_est, post_bpmm, VB = PoissonMixtureModel.learn_GS(X, bpmm, max_iter)
#S_est, post_bpmm, VB = PoissonMixtureModel.learn_CGS(X, bpmm, max_iter)
toc()
## plot
visualize_2D(X, S, PoissonMixtureModel.winner_takes_all(S_est), "2D plot")
# VB check
figure("ELBO")
clf()
plot(VB)
ylabel("ELBO")
xlabel("iterations")
show()
end
test_1D()
#test_2D()
================================================
FILE: src/demo_PolynomialRegression.jl
================================================
#################################
## Bayesian model selection demo
## for polynomial regression
using PyPlot, PyCall
using Distributions
function poly(X_raw, M)
N = size(X_raw, 1)
X = zeros(M, N)
for m in 0 : M - 1
X[m+1,:] = X_raw.^m
end
return X
end
function learn_bayes(X_raw, Y, M, sig2_y, Sig_w, X_lin)
X = poly(X_raw, M)
N = size(X_raw, 1)
# calc posterior
Sig_w_h = inv(X*inv(sig2_y*eye(N))*X' + inv(Sig_w))
mu_w_h = Sig_w_h * (X * inv(sig2_y * eye(N)) * Y)
# calc predictive
X_test = poly(X_lin, M)
Y_est = (mu_w_h'*X_test)'
sig2_y_prd = sig2_y + diag(X_test'Sig_w_h*X_test)
# calc evidence
evidence = -0.5*(sum(Y)*inv(sig2_y) +N*log.(sig2_y) + N*log.(2*pi)
+ logdet(Sig_w)
- (mu_w_h'*inv(Sig_w_h)*mu_w_h)[1] - logdet(Sig_w_h)
)
return Y_est, sqrt.(sig2_y_prd), evidence
end
function test()
# linspace
X_lin = linspace(-1, 7, 200)
# generate data
N = 10
sig2_y = 0.1
X = 2*pi*rand(N)
Y_true = [sin.(x) for x in X_lin]
Y_obs = [sin.(x) + sig2_y * randn() for x in X]
dims = [1, 2, 3, 4, 5, 10]
# learning via Bayes
sig2_w = 1.0
Y_bayes = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin) for m in dims]
#############
# compute evidences
evidence = [learn_bayes(X, Y_obs, m, sig2_y, sig2_w*eye(m), X_lin)[3] for m in dims]
figure("evidence")
clf()
plot(1:length(dims), evidence)
xticks(1:length(dims),dims)
ylabel(("\$\\ln p(\\bf{Y}|\\bf{X})\$"), fontsize=20)
xlabel(("\$M\$"), fontsize=20)
#############
# visualize
x_min = X_lin[1]
x_max = X_lin[end]
y_min = -4
y_max = 4
figure("prediction")
clf()
for k in 1 : 6
subplot(230 + k)
plot(X_lin, Y_bayes[k][1])
plot(X_lin, Y_bayes[k][1] + Y_bayes[k][2], "c--")
plot(X_lin, Y_bayes[k][1] - Y_bayes[k][2], "c--")
plot(X, Y_obs, "ko")
xlim([x_min, x_max])
ylim([y_min, y_max])
text(x_max - 2.5, y_max - 1, @sprintf("M=%d", dims[k]))
end
show()
end
test()
================================================
FILE: src/demo_Simple2DGauss.jl
================================================
###################################
## Simple VI & GS for 2D Gaussian
using PyPlot
using Distributions
function calc_KL(mu1, lambda1, mu2, lambda2)
D = size(mu1, 1)
px_lnqx = 0.5 * logdet(lambda2) - 0.5 * ((mu1 - mu2)' * lambda2 * (mu1 - mu2) + trace(lambda2 * inv(lambda1)))
px_lnpx = 0.5 * logdet(lambda1) - 0.5 * D
KL = - (px_lnqx - px_lnpx)
return KL[1]
end
function plot_results(result, truth)
N = size(result, 1)
H = Int(ceil(sqrt(N)))
W = Int(ceil(N / H))
for i in 1 : H
for j in 1 : W
n = (i - 1) * W + j
if n <= N
subplot(H, W, n)
title("$n of $N")
plot_gaussian(truth[1], truth[2], "b", "\$p(z)\$")
plot_gaussian(result[n][1], result[n][2], "r", "\$p(z)\$")
end
end
end
end
function plot_lines(X)
D, N = size(X)
X_d = zeros(D, 2*N + 1)
X_d[:,1] = X[:,1]
for i in 1 : N
X_d[1, 2*i - 1] = X[1, i]
X_d[1, 2*i] = X[1, i]
X_d[2, 2*i] = X[2, i]
X_d[2, 2*i + 1] = X[2, i]
end
plot(X[1,:], X[2,:], "oy")
plot(X_d[1,1:2*N], X_d[2,1:2*N], "--y")
end
function plot_gaussian(Mu, Sigma, col, label)
res = 100
plot(Mu[1], Mu[2], "x", color=col)
F = eigfact(Sigma)
vec = F.vectors
val = F.values
dw = 2*pi/res
w = dw * (0 : res)
c = 1.0
a = sqrt(c*val[1])
b = sqrt(c*val[2])
P1 = a*cos.(w)
P2 = b*sin.(w)
P = Mu .+ vec'*vcat(P1', P2')
plot(P[1, :], P[2, :], "-", color=col, label=label)
end
"""
Variational inference for 2D Gauss.
"""
function main_VI()
## creat truth distribution
D = 2 # dimension
theta = 2.0*pi/12 # tilt
A = reshape([cos.(theta), -sin.(theta),
sin.(theta), cos.(theta)],
2, 2)
mu = [0.0, 0.0]
lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A')
## initialize
#mu_h = randn(D)
mu_h = [-0.5, 0.3]
lambda_h = zeros(D,D)
## main iteration
max_iter = 10
KL = NaN * Array{Float64, 1}(max_iter)
result = Array{Any, 1}(max_iter)
for i in 1 : max_iter
## update
mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (mu_h[2] - mu[2])
lambda_h[1,1] = lambda[1,1]
mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (mu_h[1] - mu[1])
lambda_h[2,2] = lambda[2,2]
## calculate KL divergeince
KL[i] = calc_KL(mu_h, lambda_h, mu, lambda)
## store the results
result[i] = [deepcopy(mu_h), deepcopy(inv(lambda_h))]
end
## visualize results
figure("result per iteration (VI)")
clf()
plot_results(result, (mu, inv(lambda)))
figure("result (VI)")
clf()
plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$")
plot_gaussian(result[end][1], result[end][2], "r", "\$q(\\bf{z})\$")
xlabel("\$z_1\$", fontsize=20)
ylabel("\$z_2\$", fontsize=20)
legend(fontsize=16)
figure("KL divergence (VI)")
clf()
plot(1:max_iter, KL)
ylabel("KL divergence", fontsize=16)
xlabel("iteration", fontsize=16)
show()
end
"""
Gibbs sampling for 2D Gauss.
"""
function main_GS()
## creat truth distribution
D = 2 # dimension
theta = 2.0*pi/12 # tilt
A = reshape([cos.(theta), -sin.(theta),
sin.(theta), cos.(theta)],
2, 2)
mu = [0.0, 0.0]
#lambda = inv(A * inv(reshape([1,0,0,10], 2, 2)) * A')
lambda = inv(A * inv(reshape([1,0,0,100], 2, 2)) * A')
## initialize
#max_iter = 1000
max_iter = 50
X = randn(D, max_iter)
mu_h = randn(D)
## main iteration
KL = NaN * Array{Float64, 1}(max_iter)
for i in 2 : max_iter
## update
mu_h[1] = mu[1] - inv(lambda[1,1])*lambda[1,2] * (X[2,i-1] - mu[2])
X[1, i] = rand(Normal(mu_h[1], sqrt(inv(lambda[1,1]))))
mu_h[2] = mu[2] - inv(lambda[2,2])*lambda[2,1] * (X[1,i] - mu[1])
X[2, i] = rand(Normal(mu_h[2], sqrt(inv(lambda[2,2]))))
if i > D
KL[i] = calc_KL(mean(X[:,1:i], 2), inv(cov(X[:,1:i], 2)), mu, lambda)
end
end
## visualize results
expt_mu = mean(X, 2)
expt_Sigma = cov(X, 2)
figure("samples (GS)")
clf()
plot_lines(X)
plot_gaussian(mu, inv(lambda), "b", "\$p(\\bf{z})\$")
plot_gaussian(expt_mu, expt_Sigma, "r", "\$q(\\bf{z})\$")
xlabel("\$z_1\$", fontsize=20)
ylabel("\$z_2\$", fontsize=20)
legend(fontsize=16)
figure("KL divergence (GS)")
clf()
plot(1:max_iter, KL)
ylabel("KL divergence", fontsize=16)
xlabel("sample size", fontsize=16)
show()
end
main_VI()
main_GS()
================================================
FILE: src/demo_SimpleFitting.jl
================================================
#####################################
## Simple function fitting demo
using PyPlot, PyCall
using Distributions
# true param
W = Array([1.0, 0.0, 1.0])
# generate data
sigma = 0.5
N = 20
X = linspace(-0.4,2.4,N)
Y = [W[1] + W[2]*x + W[3]*x^2 + sigma*randn() for x in X]
X_min = minimum(X)
X_max = maximum(X)
# regression1
X_all = linspace(X_min, X_max, 100)
W1 = sum(Y.*X) / sum(X.^2)
Y1 = [W1*x for x in X_all]
# regression2
X2 = zeros(3, N)
X2[1,:] = 1
X2[2,:] = X
X2[3,:] = X.^2
W2 = inv(X2*X2') * X2*Y
Y2 = [W2[1] + W2[2]*x + W2[3]*x^2 for x in X_all]
# show data
figure()
plot(X_all, Y1, "b-")
plot(X_all, Y2, "g-")
plot(X, Y, "ko")
legend(["model1","model2","data"], loc="upper left", fontsize=16)
xlabel("\$x\$", fontsize=20)
ylabel("\$y\$", fontsize=20)
show()
================================================
FILE: src/demo_nonconjugate.jl
================================================
using PyPlot, PyCall
using Distributions
import StatsFuns.logsumexp
PyDict(matplotlib["rcParams"])["mathtext.fontset"] = "cm"
PyDict(matplotlib["rcParams"])["mathtext.rm"] = "serif"
PyDict(matplotlib["rcParams"])["lines.linewidth"] = 1.5
PyDict(matplotlib["rcParams"])["font.family"] = "TakaoPGothic"
function expt(a, b, sigma, Y, X, N_s)
S = rand(Gamma(a, 1.0/b), N_s)
C = mean([exp(sum(logpdf.(Normal(s, sigma), Y))) for s in S])
curve = [exp(sum(logpdf.(Normal(mu, sigma), Y))) * pdf(Gamma(a, 1.0/b), mu) for mu in X]
m = mean([s*exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S])
v = mean([(s-m)^2 * exp(sum(logpdf.(Normal(s, sigma), Y)))/C for s in S])
return curve/C, m, v
end
X = linspace(-5, 10, 1000)
a = 2.0
b = 2.0
mu = 1.0
sigma=1.0
# data
N = 10
Y = rand(Normal(mu, sigma), N)
# calc posterior
N_s = 100000
posterior, m, v = expt(a, b, sigma, Y, X, N_s)
a_h = m^2 / v
b_h = m / v
figure()
plot(X, pdf(Normal(mu,sigma), X))
plot(X, pdf(Gamma(a,1.0/b), X))
plot(X, posterior)
plot(X, pdf(Gamma(a_h,1.0/b_h), X))
plot(Y, 0.02*ones(N), "o")
legend(["generator", "prior", "posterior", "approx", "samples"])
#legend(["データ生成分布", "事前分布", "事後分布", "近似分布", "データ"], fontsize=12)
xlim([-3, 6])
ylim([0, 1.8])
gitextract_c_lvl76e/
├── LICENSE
├── README.md
├── data/
│ └── timeseries.jld
├── docker/
│ ├── Dockerfile
│ ├── README.md
│ └── add_packages.jl
└── src/
├── BayesNeuralNet.jl
├── DimensionalityReduction.jl
├── GaussianMixtureModel.jl
├── LogisticRegression.jl
├── NMF.jl
├── PoissonHMM.jl
├── PoissonMixtureModel.jl
├── demo_BayesNeuralNet.jl
├── demo_DimensionalityReduction.jl
├── demo_GaussianMixtureModel.jl
├── demo_LogisticRegression.jl
├── demo_NMF.jl
├── demo_PoissonHMM.jl
├── demo_PoissonMixtureModel.jl
├── demo_PolynomialRegression.jl
├── demo_Simple2DGauss.jl
├── demo_SimpleFitting.jl
└── demo_nonconjugate.jl
Condensed preview — 24 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (74K chars).
[
{
"path": "LICENSE",
"chars": 1062,
"preview": "MIT License\n\nCopyright (c) 2020 Sammy\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof t"
},
{
"path": "README.md",
"chars": 644,
"preview": "# BayesBook\n\n「機械学習スタートアップシリーズ ベイズ推論による機械学習入門」のソースコードをアップしています。\n* http://www.kspub.co.jp/book/detail/1538320.html\n* 正誤表(第"
},
{
"path": "docker/Dockerfile",
"chars": 797,
"preview": "FROM python:latest\n\n# Update\nRUN apt-get update\n\n# Install matplotlib\nRUN pip3 install matplotlib scipy scikit-learn not"
},
{
"path": "docker/README.md",
"chars": 308,
"preview": "# DockerからJupyter notebookを実行する\n\nJuliaやPythonの実行環境構築が煩わしい場合は、Dockerを使ってデモスクリプトをJupyter notebook上で動作させることができます。\nDockerのイン"
},
{
"path": "docker/add_packages.jl",
"chars": 220,
"preview": "\nPkg.update()\nPkg.add(\"PyPlot\")\nPkg.add(\"StatsFuns\")\nPkg.add(\"SpecialFunctions\")\nPkg.add(\"Distributions\")\nPkg.add(\"PDMat"
},
{
"path": "src/BayesNeuralNet.jl",
"chars": 3396,
"preview": "\"\"\"\nVariational inference for Bayesian neural network\n\"\"\"\nmodule BayesNeuralNet\nusing Distributions\n\nexport sample_data_"
},
{
"path": "src/DimensionalityReduction.jl",
"chars": 3783,
"preview": "\"\"\"\nVariational inference for Bayesian DimensionalityReduction\n\"\"\"\nmodule DimensionalityReduction\n\nusing Distributions\n#"
},
{
"path": "src/GaussianMixtureModel.jl",
"chars": 9553,
"preview": "\"\"\"\nBayesian Gaussian Mixture Model\n\"\"\"\nmodule GaussianMixtureModel\nusing StatsFuns.logsumexp, SpecialFunctions.digamma\n"
},
{
"path": "src/LogisticRegression.jl",
"chars": 2169,
"preview": "\"\"\"\nVariational inference for Bayesian logistic regression.\n\"\"\"\nmodule LogisticRegression\nusing Distributions\n\nexport si"
},
{
"path": "src/NMF.jl",
"chars": 3275,
"preview": "\"\"\"\nVariational inference for Bayesian NMF\n\"\"\"\nmodule NMF\nusing Distributions\nusing StatsFuns.logsumexp, SpecialFunction"
},
{
"path": "src/PoissonHMM.jl",
"chars": 7349,
"preview": "\"\"\"\nBayesian 1dim Poisson Hidden Markov Model\n\"\"\"\nmodule PoissonHMM\nusing StatsFuns.logsumexp, SpecialFunctions.digamma\n"
},
{
"path": "src/PoissonMixtureModel.jl",
"chars": 7904,
"preview": "\"\"\"\nBayesian Poisson Mixture Model\n\"\"\"\nmodule PoissonMixtureModel\nusing StatsFuns.logsumexp, SpecialFunctions.digamma\nus"
},
{
"path": "src/demo_BayesNeuralNet.jl",
"chars": 2255,
"preview": "####################################\n## Demo script for Bayesian neural network.\n\nusing PyPlot, PyCall\n\npush!(LOAD_PATH,"
},
{
"path": "src/demo_DimensionalityReduction.jl",
"chars": 5092,
"preview": "###################################\n## Demo script for Bayesian Dimensionality Reduction\n\nusing PyPlot, PyCall\n@pyimport"
},
{
"path": "src/demo_GaussianMixtureModel.jl",
"chars": 1960,
"preview": "###################################\n## Example code\n## for Bayesian Gaussin Mixture Model\n\nusing PyPlot, PyCall\npush!(LO"
},
{
"path": "src/demo_LogisticRegression.jl",
"chars": 3441,
"preview": "#####################################\n## Bayesian logistic regression demo\n\nusing PyPlot, PyCall\nusing Distributions\npus"
},
{
"path": "src/demo_NMF.jl",
"chars": 979,
"preview": "##############################\n## Audio decomposition demo using NMF\n\nusing PyPlot, PyCall\nusing DataFrames\nusing Distri"
},
{
"path": "src/demo_PoissonHMM.jl",
"chars": 1729,
"preview": "###################################\n## Example code\n## for Bayesian Poisson HMM\n\nusing PyPlot, PyCall\nusing HDF5, JLD\n@p"
},
{
"path": "src/demo_PoissonMixtureModel.jl",
"chars": 3679,
"preview": "###################################\n## Example code\n## for Bayesian Poisson Mixture Model\n\npush!(LOAD_PATH,\".\")\nusing Py"
},
{
"path": "src/demo_PolynomialRegression.jl",
"chars": 2187,
"preview": "#################################\n## Bayesian model selection demo\n## for polynomial regression\n\nusing PyPlot, PyCall\nus"
},
{
"path": "src/demo_Simple2DGauss.jl",
"chars": 4717,
"preview": "###################################\n## Simple VI & GS for 2D Gaussian\n\nusing PyPlot\nusing Distributions\n\nfunction calc_K"
},
{
"path": "src/demo_SimpleFitting.jl",
"chars": 774,
"preview": "#####################################\n## Simple function fitting demo\n\nusing PyPlot, PyCall\nusing Distributions\n\n# true "
},
{
"path": "src/demo_nonconjugate.jl",
"chars": 1246,
"preview": "\nusing PyPlot, PyCall\nusing Distributions\nimport StatsFuns.logsumexp\nPyDict(matplotlib[\"rcParams\"])[\"mathtext.fontset\"] "
}
]
// ... and 1 more files (download for full content)
About this extraction
This page contains the full source code of the sammy-suyama/BayesBook GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 24 files (66.9 KB), approximately 24.4k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.