Theme Song
# install.packages("remotes")
library('BBmisc', 'rmsfuns')
#remotes::install_github("rstudio/sass")
lib('sass')
## sass
## TRUE
/* https://stackoverflow.com/a/66029010/3806250 */
h1 { color: #002C54; }
h2 { color: #2F496E; }
h3 { color: #375E97; }
h4 { color: #556DAC; }
h5 { color: #92AAC7; }
/* ----------------------------------------------------------------- */
/* https://gist.github.com/himynameisdave/c7a7ed14500d29e58149#file-broken-gradient-animation-less */
.hover01 {
/* color: #FFD64D; */
background: linear-gradient(155deg, #EDAE01 0%, #FFEB94 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #EDAE01 20%, #FFEB94 80%);
}
}
.hover02 {
color: #FFD64D;
background: linear-gradient(155deg, #002C54 0%, #4CB5F5 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #002C54 20%, #4CB5F5 80%);
}
}
.hover03 {
color: #FFD64D;
background: linear-gradient(155deg, #A10115 0%, #FF3C5C 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #A10115 20%, #FF3C5C 80%);
}
}
## https://stackoverflow.com/a/36846793/3806250
options(width = 999)
knitr::opts_chunk$set(class.source = 'hover01', class.output = 'hover02', class.error = 'hover03')
if(!suppressPackageStartupMessages(require('BBmisc'))) {
install.packages('BBmisc', dependencies = TRUE, INSTALL_opts = '--no-lock')
}
suppressPackageStartupMessages(require('BBmisc'))
# suppressPackageStartupMessages(require('rmsfuns'))
pkgs <- c('devtools', 'knitr', 'kableExtra', 'tidyr',
'readr', 'lubridate', 'reprex', 'magrittr',
'timetk', 'plyr', 'dplyr', 'stringr',
'tdplyr', 'tidyverse', 'formattable',
'echarts4r', 'paletteer')
suppressAll(lib(pkgs))
# load_pkg(pkgs)
## Set the timezone but not change the datetime
Sys.setenv(TZ = 'Asia/Tokyo')
## options(knitr.table.format = 'html') will set all kableExtra tables to be 'html', otherwise need to set the parameter on every single table.
options(warn = -1, knitr.table.format = 'html')#, digits.secs = 6)
## https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-abnor-in-rmd-but-not-in-r-script
knitr::opts_chunk$set(message = FALSE, warning = FALSE)#,
#cache = TRUE, cache.lazy = FALSE)
rm(pkgs)
課題を受ける準備はできていますか?
以下に提出のための指示が表示されます。
The R dataset faithful
contains data on waiting time between eruptions (the column named waiting
) and the duration of the eruption (the column named eruptions
) for the famous Old Faithful geyser in Yellowstone National Park, Wyoming, USA.
You are asked to modify the EM algorithm provided in “Sample code for density estimation problems” to provide a density estimate the marginal distribution of the duration of the eruptions using a location-and-scale mixture of 2 univariate Gaussian distributions (as opposed to the location mixture of 6 univariate Gaussian distributions that we used for the galaxies
dataset).
Reviewers will check whether the code has been modified correctly, and whether the density estimate you generate appears correct. Please remember that you are being asked to use a location-and-scale mixture to generate the density estimate, so the “Sample code for density estimation problems” cannot be used directly and requires some modification. Before submitting your answer, it might be useful to compare the density estimate generated by your algorithm against a kernel density estimate generated by the R function density()
. While they should not be identical, they should be similar.
Provide an EM algorithm to fit the two-component, location-and-scale mixture of Gaussians.
## Initialize the parameters
x = faithful$eruptions
n = length(x)
KK = 2
w = rep(1, KK)/KK
mu = rnorm(KK, mean(x), sd(x))
sigma = rep(sd(x)/KK, KK)
xx = seq(0,7,length.out=150)
epsilon = 0.000001
s = 0
sw = FALSE
KL = -Inf
KL.out = NULL
print(paste(w, mu, sigma))
## [1] "0.5 2.19472524221543 0.570685625552604" "0.5 3.96920174696408 0.570685625552604"
while(!sw){
## E step
v = array(0, dim=c(n,KK))
for(k in 1:KK){
v[,k] = log(w[k]) + dnorm(x, mu[k], sigma[k],log=TRUE)
}
for(i in 1:n){
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = apply(v,2,mean)
mu = rep(0, KK)
for(k in 1:KK){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Standard deviations
sigma = rep(0,KK)
for(k in 1:KK){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
##Check convergence
KLn = 0
for(i in 1:n){
for(k in 1:KK){
KLn = KLn + v[i,k]*(log(w[k]) + dnorm(x[i], mu[k], sigma[k], log=TRUE))
}
}
if(abs(KLn-KL)/abs(KLn)<epsilon){
sw=TRUE
}
KL = KLn
KL.out = c(KL.out, KL)
s = s + 1
print(paste(w, mu, sigma))
print(paste(s, KLn))
}
## [1] "0.36155193799222 2.06924255584127 0.347513175967975" "0.63844806200778 4.29109992656149 0.423927569185264"
## [1] "1 -310.947596566627"
## [1] "0.357553920678491 2.04142373782759 0.272467417679584" "0.642446079321509 4.29275563471645 0.408325891694752"
## [1] "2 -280.323053754154"
## [1] "0.354340755956265 2.03283969328168 0.257991091244255" "0.645659244043735 4.286262699884 0.417628155756559"
## [1] "3 -278.852517230604"
## [1] "0.352296405236873 2.02787168932402 0.250281833218957" "0.647703594763127 4.28185238468387 0.424288137139489"
## [1] "4 -278.664478797591"
## [1] "0.350917937348503 2.02455852725269 0.245083302842545" "0.649082062651497 4.27885678193768 0.428792636948692"
## [1] "5 -278.493973424933"
## [1] "0.349989919914604 2.0223437009035 0.241574737150429" "0.650010080085396 4.27683087307054 0.431830580812401"
## [1] "6 -278.354951123611"
## [1] "0.349383127380817 2.02090515935754 0.239285333481236" "0.650616872619183 4.27550074488632 0.43382441297239"
## [1] "7 -278.25871496773"
## [1] "0.348998675583463 2.01999879441996 0.23784033291156" "0.651001324416537 4.274655181073 0.435092686028913"
## [1] "8 -278.19774936255"
## [1] "0.348761172255986 2.01944116938323 0.236950889900396" "0.651238827744014 4.27413154762529 0.435878725010135"
## [1] "9 -278.160763119378"
## [1] "0.348617039591514 2.01910371204385 0.236412613371422" "0.651382960408486 4.27381325383233 0.436356845919851"
## [1] "10 -278.138749333455"
## [1] "0.348530586362821 2.01890166407992 0.236090358786904" "0.651469413637179 4.27362213661071 0.436644068042301"
## [1] "11 -278.125745645957"
## [1] "0.348479109543088 2.01878149361119 0.235898713290557" "0.651520890456912 4.27350826621535 0.436815253061429"
## [1] "12 -278.11808442015"
## [1] "0.348448596167493 2.018710310048 0.235785199758227" "0.651551403832507 4.27344074197712 0.436916784486784"
## [1] "13 -278.113574121384"
## [1] "0.348430557998857 2.01866824670674 0.235718126459832" "0.651569442001143 4.27340081522087 0.436976826782995"
## [1] "14 -278.110919147511"
## [1] "0.34841991185455 2.01864342694957 0.235678550676161" "0.65158008814545 4.27337724711163 0.437012271342916"
## [1] "15 -278.109356225655"
## [1] "0.348413634518935 2.01862879448143 0.235655219261797" "0.651586365481065 4.27336334938451 0.437033173324635"
## [1] "16 -278.108436101175"
## [1] "0.348409935285991 2.018620172309 0.235641471403304" "0.651590064714009 4.27335515905428 0.437045491780521"
## [1] "17 -278.107894372473"
## [1] "0.348407756059573 2.0186150932248 0.23563337297577" "0.651592243940427 4.27335033397272 0.437052748931005"
## [1] "18 -278.107575413498"
## [1] "0.348406472526331 2.01861210180596 0.235628603279808" "0.651593527473669 4.27334749202008 0.437057023400369"
## [1] "19 -278.107387611981"
Provide code to generate the density estimate on a grid consisting of 150 points in the interval [0,7].
## Plot EM Density Estimate
Eruptions = seq(0, 7,length.out=300)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
for(k in 1:KK){
density.EM[s] = density.EM[s] + w[k]*dnorm(Eruptions[s], mu[k], sigma[k])
}
}
yy = density(x)
#plot(Eruptions, density.EM, col="red", lwd=2, type="l")
plot(Eruptions, rep(density.EM, 2), col="red", lwd=2, type="l")
points(x, rep(0,n))
Provide the a graph of the density estimate on the interval [0,7].
###### Setup
#data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) + v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -1259.19051378529"
## [1] "2 -1845.66425838666"
## [1] "3 -1845.6665983302"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
Is the E step implemented correctly?
There are a couple of ways in which the E step can be implemented. This is an example that uses \(\omega\) and \(1-\omega\) as the weights of the two components
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
An alternative implementation that is also valid would parameterize the weights in terms of \(\omega_1\) and \(\omega_2\), so be open minded when grading.
Is the M step implemented correctly?
The exact form of the implementation of the M step also depends on how the weights are parameterized. Assuming that \(\omega\) and \(1-\omega\) are used,
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
As always, please remember that there are multiple ways to achieve the same goal in R.
Does the code to generate the density estimate appear correct?
This is an example that, as before, assumes that the weights are parameterized as ww and \(1-\omega\).
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) +
(1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l")
points(x,rep(0,n))
Does the graph appear to be correct?
The density estimate should look similar to the Figure below. Note that the variance of both components is clearly different.
The complete code for the EM algorithm follows:
###### Setup data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) +
v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -309.450737493483"
## [1] "2 -292.32994157869"
## [1] "3 -280.279590400843"
## [1] "4 -278.882852059725"
## [1] "5 -278.687833281763"
## [1] "6 -278.515594042029"
## [1] "7 -278.371067671734"
## [1] "8 -278.269313940617"
## [1] "9 -278.20429223392"
## [1] "10 -278.164685957935"
## [1] "11 -278.141072956464"
## [1] "12 -278.127115849347"
## [1] "13 -278.118891261794"
## [1] "14 -278.114049071933"
## [1] "15 -278.111198729535"
## [1] "16 -278.109520815145"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) +
(1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
Provide an EM algorithm to fit the two-component, location-and-scale mixture of Gaussians.
KK = 2
while(!sw){
## E step
v = array(0, dim=c(n,KK))
for(k in 1:KK){
v[,k] = log(w[k]) + dnorm(x, mu[k], sigma,log=TRUE)
}
for(i in 1:n){
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = apply(v,2,mean)
mu = rep(0, KK)
for(k in 1:KK){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Standard deviations
sigma = 0
for(i in 1:n){
for(k in 1:KK){
sigma = sigma + v[i,k]*(x[i] - mu[k])^2
}
}
sigma = sqrt(sigma/sum(v))
##Check convergence
KLn = 0
for(i in 1:n){
for(k in 1:KK){
KLn = KLn + v[i,k]*(log(w[k]) + dnorm(x[i], mu[k], sigma, log=TRUE))
}
}
if(abs(KLn-KL)/abs(KLn)<epsilon){
sw=TRUE
}
KL = KLn
KL.out = c(KL.out, KL)
s = s + 1
print(paste(s, KLn))
}
Provide code to generate the density estimate on a grid consisting of 150 points in the interval [0,7].
xx = seq(0,7,length.out=150)
nxx = length(xx)
#density.EM = rep(0, nxx)
density.EM = array(0, dim=c(nxx,2))
for(s in 1:nxx){
for(k in 1:KK){
density.EM[is.nan(density.EM[s,k]),k] <- rep(0,2)
#density.EM[s] = density.EM[s] + w[k]*dnorm(xx[s], mu[k], sigma)
density.EM[s,] = density.EM[s,] + w[k]*dnorm(xx[s], mu[k], sigma[k])
}
}
## Since density.EM[,1] == density.EM[,2]
density.EM = density.EM[,1]
## Number of iterations of the sampler
rrr = 12000
burn = 2000
## Storing the samples
cc.out = array(0, dim=c(rrr, n))
w.out = array(0, dim=c(rrr, KK))
mu.out = array(0, dim=c(rrr, KK))
sigma.out = rep(0, rrr)
logpost = rep(0, rrr)
## Compute the samples of the density over a dense grid
density.mcmc = array(0, dim=c(rrr-burn,length(xx)))
for(s in 1:(rrr-burn)){
for(k in 1:KK){
density.mcmc[is.nan(density.mcmc[s,k]),k] <- 0
density.mcmc[s,] = density.mcmc[s,] + w.out[s+burn,k]*dnorm(xx,mu.out[s+burn,k],sigma.out[s+burn])
}
}
density.mcmc[is.nan(density.mcmc)] <- 0
density.mcmc.m = apply(density.mcmc , 2, mean)
Provide the a graph of the density estimate on the interval [0,7].
Here is the code to generate the graph. I apparently can not copy the graph to this window.
## Plot Bayesian estimate with pointwise credible bands along with kernel density estimate and frequentist point estimate
colscale = c("black", "blue", "red")
yy = density(x)
#density.mcmc.lq = replace_na(apply(density.mcmc, 2, quantile, 0.025, na.rm=TRUE), 0)
#density.mcmc.uq = replace_na(apply(density.mcmc, 2, quantile, 0.975, na.rm=TRUE), 0)
density.mcmc.lq = apply(density.mcmc, 2, quantile, 0.025)
density.mcmc.uq = apply(density.mcmc, 2, quantile, 0.975)
#plot(xx, density.mcmc.m, type="n", ylim=c(0,max(density.mcmc.uq)), xlab="Eruption Duration", ylab="Density")
plot(xx, density.EM, type="n", ylim=c(0,max(density.mcmc.uq)), xlab="Eruption Duration", ylab="Density")
polygon(c(xx,rev(xx)), c(density.mcmc.lq, rev(density.mcmc.uq)), col="grey", border="grey")
#lines(xx, density.mcmc.m, col=colscale[1], lwd=2)
lines(xx, density.EM, col=colscale[2], lty=2, lwd=2)
lines(yy, col=colscale[3], lty=3, lwd=2)
points(x, rep(0,n))
#legend(27000, 0.00017, c("KDE","EM","MCMC"), col=colscale[c(3,2,1)], lty=c(3,2,1), lwd=2, bty="n")
legend(length(xx), length(yy), c("KDE","EM","MCMC"), col=colscale[c(3,2,1)], lty=c(3,2,1), lwd=2, bty="n")
graph plotted from Rmarkdown
Is the E step implemented correctly?
There are a couple of ways in which the E step can be implemented. This is an example that uses \(\omega\) and \(1-\omega\) as the weights of the two components
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
An alternative implementation that is also valid would parameterize the weights in terms of \(\omega_1\) and \(\omega_2\), so be open minded when grading.
Is the M step implemented correctly?
The exact form of the implementation of the M step also depends on how the weights are parameterized. Assuming that \(\omega\) and \(1-\omega\) are used,
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
As always, please remember that there are multiple ways to achieve the same goal in R.
Does the code to generate the density estimate appear correct?
This is an example that, as before, assumes that the weights are parameterized as ww and \(1-\omega\).
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) +
(1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l")
points(x,rep(0,n))
Does the graph appear to be correct?
The density estimate should look similar to the Figure below. Note that the variance of both components is clearly different.
The complete code for the EM algorithm follows:
###### Setup data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) +
v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -309.450737493483"
## [1] "2 -292.32994157869"
## [1] "3 -280.279590400843"
## [1] "4 -278.882852059725"
## [1] "5 -278.687833281763"
## [1] "6 -278.515594042029"
## [1] "7 -278.371067671734"
## [1] "8 -278.269313940617"
## [1] "9 -278.20429223392"
## [1] "10 -278.164685957935"
## [1] "11 -278.141072956464"
## [1] "12 -278.127115849347"
## [1] "13 -278.118891261794"
## [1] "14 -278.114049071933"
## [1] "15 -278.111198729535"
## [1] "16 -278.109520815145"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) +
(1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
Provide an EM algorithm to fit the two-component, location-and-scale mixture of Gaussians.
## Initialize the parameters
KK = 2
w = rep(1, KK)/KK
mu = rnorm(KK, mean(x), sd(x))
sigma = rep(sd(x)/KK, KK)
epsilon = 0.000001
s = 0
sw = FALSE
KL = -Inf
KL.out = NULL
print(paste(w, mu, sigma))
## [1] "0.5 2.09064211995203 0.570685625552604" "0.5 3.31188147955927 0.570685625552604"
while(!sw){
## E step
v = array(0, dim=c(n,KK))
for(k in 1:KK){
v[,k] = log(w[k]) + dnorm(x, mu[k], sigma[k],log=TRUE)
}
for(i in 1:n){
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = apply(v,2,mean)
mu = rep(0, KK)
for(k in 1:KK){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Standard deviations
sigma = rep(0,KK)
for(k in 1:KK){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
##Check convergence
KLn = 0
for(i in 1:n){
for(k in 1:KK){
KLn = KLn + v[i,k]*(log(w[k]) + dnorm(x[i], mu[k], sigma[k], log=TRUE))
}
}
if(abs(KLn-KL)/abs(KLn)<epsilon){
sw=TRUE
}
KL = KLn
KL.out = c(KL.out, KL)
s = s + 1
print(paste(w, mu, sigma))
print(paste(s, KLn))
}
## [1] "0.323411346093372 2.03239424384766 0.319088020905558" "0.676588653906628 4.18346319243881 0.610927726279495"
## [1] "1 -365.980520229296"
## [1] "0.345321046809079 2.01509475547428 0.233127783239883" "0.654678953190921 4.26457646797335 0.454431847605638"
## [1] "2 -284.034576357627"
## [1] "0.347252299734954 2.01600557221248 0.231560636683754" "0.652747700265046 4.27074735900882 0.441039727370028"
## [1] "3 -278.069682966519"
## [1] "0.347764399292391 2.01712554199243 0.233262633652149" "0.652235600707609 4.27192050357894 0.439208482752986"
## [1] "4 -278.022261017908"
## [1] "0.348036245536812 2.01775217094114 0.234258359617887" "0.651963754463188 4.2725261629471 0.438293703444708"
## [1] "5 -278.05543036737"
## [1] "0.348190473586506 2.01810966413281 0.234827746217508" "0.651809526413494 4.27286870729867 0.437777579940825"
## [1] "6 -278.076486185349"
## [1] "0.348279453944333 2.01831640391427 0.235157208020331" "0.651720546055667 4.27306607110599 0.437480444370646"
## [1] "7 -278.089037902181"
## [1] "0.348331238876471 2.01843687815251 0.235349244201139" "0.651668761123529 4.27318084892305 0.437307715750227"
## [1] "8 -278.096461029117"
## [1] "0.348361524937117 2.01850738838238 0.235461651763411" "0.651638475062883 4.27324794787915 0.437206762070107"
## [1] "9 -278.100840042692"
## [1] "0.348379287553913 2.0185487597672 0.235527610551497" "0.651620712446087 4.2732872915625 0.437147575298129"
## [1] "10 -278.103420729967"
## [1] "0.348389722312361 2.01857306962596 0.235566369436335" "0.651610277687639 4.27331040100001 0.43711281317434"
## [1] "11 -278.104940951277"
## [1] "0.348395858142236 2.01858736632802 0.235589164138868" "0.651604141857764 4.27332398865597 0.437092374983608"
## [1] "12 -278.105836293863"
## [1] "0.348399468146303 2.01859577847615 0.235602576656094" "0.651600531853697 4.27333198254208 0.437080351100509"
## [1] "13 -278.106363555846"
## [1] "0.348401592784744 2.01860072962282 0.235610470931357" "0.651598407215256 4.27333668714509 0.437073274849119"
## [1] "14 -278.106674040049"
## [1] "0.348402843465363 2.01860364422872 0.235615118096664" "0.651597156534637 4.27333945649048 0.437069109479295"
## [1] "15 -278.106856866558"
Provide code to generate the density estimate on a grid consisting of 150 points in the interval [0,7].
## Plot EM Density Estimate
Eruptions = seq(0, 7,length.out=300)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
for(k in 1:KK){
density.EM[s] = density.EM[s] + w[k]*dnorm(Eruptions[s], mu[k], sigma[k])
}
}
yy = density(x)
#plot(Eruptions, density.EM, col="red", lwd=2, type="l")
plot(Eruptions, rep(density.EM, 2), col="red", lwd=2, type="l")
points(x, rep(0,n))
Provide the a graph of the density estimate on the interval [0,7].
Here is the code to generate the graph. I apparently can not copy the graph to this window.
## Plot EM Density Estimate
Eruptions = seq(0, 7,length.out=300)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
for(k in 1:KK){
density.EM[s] = density.EM[s] + w[k]*dnorm(Eruptions[s], mu[k], sigma[k])
}
}
yy = density(x)
plot(Eruptions, rep(density.EM, 2), col="red", lwd=2, type="l")
points(x, rep(0,n))
Is the E step implemented correctly?
There are a couple of ways in which the E step can be implemented. This is an example that uses \(\omega\) and \(1-\omega\) as the weights of the two components
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## Error in v[i, 1] <- log(w) + dnorm(x[i], mu[1], sigma[1], log = T): number of items to replace is not a multiple of replacement length
An alternative implementation that is also valid would parameterize the weights in terms of \(\omega_1\) and \(\omega_2\), so be open minded when grading.
Is the M step implemented correctly?
The exact form of the implementation of the M step also depends on how the weights are parameterized. Assuming that \(\omega\) and \(1-\omega\) are used,
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
As always, please remember that there are multiple ways to achieve the same goal in R.
Does the code to generate the density estimate appear correct?
This is an example that, as before, assumes that the weights are parameterized as ww and \(1-\omega\).
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) +
(1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l")
## Error in plot.window(...): need finite 'ylim' values
points(x,rep(0,n))
Does the graph appear to be correct?
The density estimate should look similar to the Figure below. Note that the variance of both components is clearly different.
The complete code for the EM algorithm follows:
###### Setup data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) +
v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -309.450737493483"
## [1] "2 -292.32994157869"
## [1] "3 -280.279590400843"
## [1] "4 -278.882852059725"
## [1] "5 -278.687833281763"
## [1] "6 -278.515594042029"
## [1] "7 -278.371067671734"
## [1] "8 -278.269313940617"
## [1] "9 -278.20429223392"
## [1] "10 -278.164685957935"
## [1] "11 -278.141072956464"
## [1] "12 -278.127115849347"
## [1] "13 -278.118891261794"
## [1] "14 -278.114049071933"
## [1] "15 -278.111198729535"
## [1] "16 -278.109520815145"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) +
(1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
Provide an EM algorithm to fit the two-component, location-and-scale mixture of Gaussians.
## E step
KK = 2
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
Provide code to generate the density estimate on a grid consisting of 150 points in the interval [0,7].
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l")
points(x,rep(0,n))
Provide the a graph of the density estimate on the interval [0,7].
Here is the code to generate the graph. I apparently can not copy the graph to this window.
###### Setup
#data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) + v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -1259.19051378529"
## [1] "2 -1845.66425838666"
## [1] "3 -1845.6665983302"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
Is the E step implemented correctly?
There are a couple of ways in which the E step can be implemented. This is an example that uses \(\omega\) and \(1-\omega\) as the weights of the two components
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
An alternative implementation that is also valid would parameterize the weights in terms of \(\omega_1\) and \(\omega_2\), so be open minded when grading.
Is the M step implemented correctly?
The exact form of the implementation of the M step also depends on how the weights are parameterized. Assuming that \(\omega\) and \(1-\omega\) are used,
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
As always, please remember that there are multiple ways to achieve the same goal in R.
Does the code to generate the density estimate appear correct?
This is an example that, as before, assumes that the weights are parameterized as ww and \(1-\omega\).
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l")
points(x,rep(0,n))
Does the graph appear to be correct?
The density estimate should look similar to the Figure below. Note that the variance of both components is clearly different.
The complete code for the EM algorithm follows:
###### Setup data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) +
v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -309.450737493483"
## [1] "2 -292.32994157869"
## [1] "3 -280.279590400843"
## [1] "4 -278.882852059725"
## [1] "5 -278.687833281763"
## [1] "6 -278.515594042029"
## [1] "7 -278.371067671734"
## [1] "8 -278.269313940617"
## [1] "9 -278.20429223392"
## [1] "10 -278.164685957935"
## [1] "11 -278.141072956464"
## [1] "12 -278.127115849347"
## [1] "13 -278.118891261794"
## [1] "14 -278.114049071933"
## [1] "15 -278.111198729535"
## [1] "16 -278.109520815145"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
Provide an EM algorithm to fit the two-component, location-and-scale mixture of Gaussians.
KK = 2
n = length(x)
w = 1/2
mu = c(2, 4.5) # estimate from graph visually
sigma = c(1, 1)
epsilon = 1e-6
s = 0
sw = FALSE
KL = -Inf
KL.out = NULL
while(!sw){
## E step
v = array(0, dim=c(n,KK))
for(k in 1:KK){
v[,k] = log((k-1) - w*(-1)**k) + dnorm(x, mu[k], sigma[k],log=TRUE)
}
for(i in 1:n){
ve = exp(v[i,] - max(v[i,]))
v[i,] = ve/sum(ve)
}
## M step
# Weights
w = mean(v[,1])
mu = rep(0, KK)
for(k in 1:KK) {
mu[k] = sum(v[,k] * x) / sum(v[,k])
}
# Standard deviations
for (k in 1:KK) {
sigma[k] = sqrt(sum(v[,k]*(x - mu[k])**2)/sum(v[,k]))
}
##Check convergence
KLn = 0
for(i in 1:n){
KLn = KLn + v[i, 1] * (log(w) + dnorm(x[i], mu[1], sigma[1], log=TRUE)) +
v[i, 2] * (log(1- w) + dnorm(x[i], mu[2], sigma[2], log=TRUE))
}
if(abs(KLn-KL)/abs(KLn)<epsilon){sw=TRUE}
KL = KLn
KL.out = c(KL.out, KL)
s = s + 1
print(paste(s, KLn))
}
## [1] "1 -436.463760609134"
## [1] "2 -342.896071532877"
## [1] "3 -302.631346491922"
## [1] "4 -282.777878794605"
## [1] "5 -279.072132966187"
## [1] "6 -278.75385863479"
## [1] "7 -278.579642201666"
## [1] "8 -278.421143680882"
## [1] "9 -278.303103275969"
## [1] "10 -278.225414767213"
## [1] "11 -278.177421001705"
## [1] "12 -278.148633059412"
## [1] "13 -278.131577373084"
## [1] "14 -278.121519008874"
## [1] "15 -278.115595965187"
## [1] "16 -278.112109304446"
## [1] "17 -278.110056858197"
## [1] "18 -278.108848585896"
## [1] "19 -278.108137228569"
## [1] "20 -278.107718403548"
## [1] "21 -278.107471804299"
Provide code to generate the density estimate on a grid consisting of 150 points in the interval [0,7].
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(i in 1:nxx){
for(k in 1:KK){
density.EM[i] = density.EM[i] + ((k-1) - w*(-1)**k) * dnorm(xx[i], mu[k], sigma[k])
}
}
plot(xx, density.EM, col="red", lwd=2, type="l")
points(x,rep(0,n))
Provide the a graph of the density estimate on the interval [0,7].
Here is the code to generate the graph. I apparently can not copy the graph to this window.
Is the E step implemented correctly?
There are a couple of ways in which the E step can be implemented. This is an example that uses \(\omega\) and \(1-\omega\) as the weights of the two components
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
An alternative implementation that is also valid would parameterize the weights in terms of \(\omega_1\) and \(\omega_2\), so be open minded when grading.
Is the M step implemented correctly?
The exact form of the implementation of the M step also depends on how the weights are parameterized. Assuming that \(\omega\) and \(1-\omega\) are used,
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
As always, please remember that there are multiple ways to achieve the same goal in R.
Does the code to generate the density estimate appear correct?
This is an example that, as before, assumes that the weights are parameterized as ww and \(1-\omega\).
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l")
points(x,rep(0,n))
Does the graph appear to be correct?
The density estimate should look similar to the Figure below. Note that the variance of both components is clearly different.
The complete code for the EM algorithm follows:
###### Setup data
x = faithful$eruptions
n = length(x)
###### EM algorithm to fit the location-and-scale mixture of 2 Gaussians
w = 0.5
mu = c(mean(x)-sd(x), mean(x)+sd(x))
sigma = rep(sd(x)/4,2)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
v[i,1] = log(w) + dnorm(x[i], mu[1], sigma[1], log=T)
v[i,2] = log(1-w) + dnorm(x[i], mu[2], sigma[2], log=T)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}
## M step
# Weights
w = mean(v[,1])
# Means
mu = rep(0, 2)
for(k in 1:2){
for(i in 1:n){
mu[k] = mu[k] + v[i,k]*x[i]
}
mu[k] = mu[k]/sum(v[,k])
}
# Variances
sigma = rep(0,2)
for(k in 1:2){
for(i in 1:n){
sigma[k] = sigma[k] + v[i,k]*(x[i] - mu[k])^2
}
sigma[k] = sqrt(sigma[k]/sum(v[,k]))
}
## Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*(log(w) + dnorm(x[i],mu[1],sigma[1],log=TRUE)) +
v[i,2]*(log(1-w) + dnorm(x[i],mu[2],sigma[2],log=TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -309.450737493483"
## [1] "2 -292.32994157869"
## [1] "3 -280.279590400843"
## [1] "4 -278.882852059725"
## [1] "5 -278.687833281763"
## [1] "6 -278.515594042029"
## [1] "7 -278.371067671734"
## [1] "8 -278.269313940617"
## [1] "9 -278.20429223392"
## [1] "10 -278.164685957935"
## [1] "11 -278.141072956464"
## [1] "12 -278.127115849347"
## [1] "13 -278.118891261794"
## [1] "14 -278.114049071933"
## [1] "15 -278.111198729535"
## [1] "16 -278.109520815145"
xx = seq(0,7,length.out=150)
nxx = length(xx)
density.EM = rep(0, nxx)
for(s in 1:nxx){
density.EM[s] = density.EM[s] + w*dnorm(xx[s], mu[1], sigma[1]) + (1-w)*dnorm(xx[s], mu[2], sigma[2])
}
plot(xx, density.EM, col="red", lwd=2, type="l", xlab="Eruptions")
points(x,rep(0,n))
It’s useful to record some information about how your file was created.
suppressMessages(require('dplyr', quietly = TRUE))
suppressMessages(require('magrittr', quietly = TRUE))
suppressMessages(require('formattable', quietly = TRUE))
suppressMessages(require('knitr', quietly = TRUE))
suppressMessages(require('kableExtra', quietly = TRUE))
sys1 <- devtools::session_info()$platform %>%
unlist %>% data.frame(Category = names(.), session_info = .)
rownames(sys1) <- NULL
sys2 <- data.frame(Sys.info()) %>%
dplyr::mutate(Category = rownames(.)) %>% .[2:1]
names(sys2)[2] <- c('Sys.info')
rownames(sys2) <- NULL
if (nrow(sys1) == 9 & nrow(sys2) == 8) {
sys2 %<>% rbind(., data.frame(
Category = 'Current time',
Sys.info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
} else {
sys1 %<>% rbind(., data.frame(
Category = 'Current time',
session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
}
sys <- cbind(sys1, sys2) %>%
kbl(caption = 'Additional session information:') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
row_spec(0, background = 'DimGrey', color = 'yellow') %>%
column_spec(1, background = 'CornflowerBlue', color = 'red') %>%
column_spec(2, background = 'grey', color = 'black') %>%
column_spec(3, background = 'CornflowerBlue', color = 'blue') %>%
column_spec(4, background = 'grey', color = 'white') %>%
row_spec(9, bold = T, color = 'yellow', background = '#D7261E')
rm(sys1, sys2)
sys
Category | session_info | Category | Sys.info |
---|---|---|---|
version | R version 4.1.0 (2021-05-18) | sysname | Linux |
os | Ubuntu 20.04.2 LTS | release | 5.8.0-54-generic |
system | x86_64, linux-gnu | version | #61~20.04.1-Ubuntu SMP Thu May 13 00:05:49 UTC 2021 |
ui | X11 | nodename | Scibrokes-Trading |
language | en | machine | x86_64 |
collate | en_US.UTF-8 | login | englianhu |
ctype | en_US.UTF-8 | user | englianhu |
tz | Asia/Tokyo | effective_user | englianhu |
date | 2021-05-25 | Current time | 2021-05-25 17:11:02 JST🗾 |