EM.multi <- function(theta, maxit, tollerr) {
header = paste0("iteration", " theta", " MRE")
print(header)
print(sprintf('%2.0f %12.12f --', 0, theta))
for (it in 1:maxit) {
#E-step: obtain E*(x2)
Estar.x2 = 1997*(theta/(2+theta))
#M-step: obtain theta.new
theta.new = (Estar.x2 + 32)/(Estar.x2 + 907 + 904 + 32)
#check convergence
mre = abs(theta.new - theta)/max(1, abs(theta.new))
#print stuff
print(sprintf('%2.0f %12.12f %.1e', it, theta.new, mre))
if(mre < tollerr) {
break
}
theta = theta.new
}#end for loop using maxit
return(theta)
}
theta.0 = 0.02
tolerance = 1e-6
estimate = EM.multi(theta.0, maxit=200, tollerr=tolerance)
## [1] "iteration theta MRE"
## [1] " 0 0.020000000000 --"
## [1] " 1 0.027793132773 7.8e-03"
## [1] " 2 0.031742941132 3.9e-03"
## [1] " 3 0.033721123764 2.0e-03"
## [1] " 4 0.034705946241 9.8e-04"
## [1] " 5 0.035194771667 4.9e-04"
## [1] " 6 0.035437045211 2.4e-04"
## [1] " 7 0.035557033560 1.2e-04"
## [1] " 8 0.035616437341 5.9e-05"
## [1] " 9 0.035645841640 2.9e-05"
## [1] "10 0.035660395186 1.5e-05"
## [1] "11 0.035667598091 7.2e-06"
## [1] "12 0.035671162906 3.6e-06"
## [1] "13 0.035672927162 1.8e-06"
## [1] "14 0.035673800302 8.7e-07"
theta.star = (-1657 + sqrt(3728689))/7680
EM.multi.cvratio <- function(theta, maxit, tollerr) {
T.STAR = (-1657 + sqrt(3728689))/7680
header = paste0("iteration", " theta"," Lin.Ratio"," Quad.Ratio")
print(header)
print(sprintf('%2.0f %12.12f -- -- ', 0, theta))
for (it in 1:maxit) {
#E-step: obtain E*(x2)
Estar.x2 = 1997*(theta/(2+theta))
#M-step: obtain theta.new
theta.new = (Estar.x2 + 32)/(Estar.x2 + 907 + 904 + 32)
lin.ratio = abs(theta.new - T.STAR)/(abs(theta - T.STAR))
quad.ratio = abs(theta.new - T.STAR)/((abs(theta - T.STAR)^2))
#check convergence
mre = abs(theta.new - theta)/max(1, abs(theta.new))
#print stuff
print(sprintf('%2.0f %12.12f %.3e %.3e', it, theta.new, lin.ratio, quad.ratio))
if(mre < tollerr) {
break
}
theta = theta.new
}#end for loop using maxit
return(theta)
}
theta.0 = 0.02
tolerance = 1e-10
ratios = EM.multi.cvratio(theta.0, maxit=200, tolerance)
## [1] "iteration theta Lin.Ratio Quad.Ratio"
## [1] " 0 0.020000000000 -- -- "
## [1] " 1 0.027793132773 5.028e-01 3.208e+01"
## [1] " 2 0.031742941132 4.989e-01 6.329e+01"
## [1] " 3 0.033721123764 4.969e-01 1.264e+02"
## [1] " 4 0.034705946241 4.959e-01 2.538e+02"
## [1] " 5 0.035194771667 4.954e-01 5.114e+02"
## [1] " 6 0.035437045211 4.951e-01 1.032e+03"
## [1] " 7 0.035557033560 4.950e-01 2.083e+03"
## [1] " 8 0.035616437341 4.950e-01 4.208e+03"
## [1] " 9 0.035645841640 4.949e-01 8.501e+03"
## [1] "10 0.035660395186 4.949e-01 1.718e+04"
## [1] "11 0.035667598091 4.949e-01 3.470e+04"
## [1] "12 0.035671162906 4.949e-01 7.012e+04"
## [1] "13 0.035672927162 4.949e-01 1.417e+05"
## [1] "14 0.035673800302 4.949e-01 2.863e+05"
## [1] "15 0.035674232423 4.949e-01 5.785e+05"
## [1] "16 0.035674446281 4.949e-01 1.169e+06"
## [1] "17 0.035674552120 4.949e-01 2.362e+06"
## [1] "18 0.035674604500 4.949e-01 4.772e+06"
## [1] "19 0.035674630423 4.949e-01 9.643e+06"
## [1] "20 0.035674643252 4.949e-01 1.948e+07"
## [1] "21 0.035674649602 4.949e-01 3.937e+07"
## [1] "22 0.035674652744 4.949e-01 7.955e+07"
## [1] "23 0.035674654299 4.949e-01 1.607e+08"
## [1] "24 0.035674655069 4.949e-01 3.248e+08"
## [1] "25 0.035674655450 4.949e-01 6.563e+08"
## [1] "26 0.035674655638 4.949e-01 1.326e+09"
## [1] "27 0.035674655731 4.949e-01 2.679e+09"
It appears that the algorithm converges linearly since the the Linear Ratio is converging to a fixed positive finite number (4.949e-01). The algorithm does not converge quadratically since the Quadratic Ratio is increasing at every iteration and not settling upon a fixed positive finite number.
data = read.csv('ExJ42.txt')
y = data$y
hist(y, prob=TRUE, col='black', border='white')
lines(density(y, na.rm=T), col='red', lwd=4)
grid(nx=NA, ny=NULL, lty=1, lwd=1, col='gray')
The EM Algorithm is the following:
-Start with an initial guess \((\alpha, \beta, \mu_1, \mu_2, \mu_3, \sigma^2)\)
-E-step: Compute \(E^{*}(z_{ij}) = \frac{f_j(y_i)\pi_j}{\sum_{k=1}^{3}f_k(y_i)\pi_k}\)
-M-step: Obtain the following:
\[ \hat{\alpha} = \sum_{i=1}^{n}\frac{z_{i1}}{n}\\ \hat{\beta} = \sum_{i=1}^{n}\frac{z_{i2}}{n}\\ \hat{\mu_1} = \frac{\sum_{i=1}^{n}z_{i1}y_i}{\sum_{i=1}^{n}z_{i1}}\\ \hat{\mu_2} = \frac{\sum_{i=1}^{n}z_{i2}y_i}{\sum_{i=1}^{n}z_{i2}}\\ \hat{\mu_3} = \frac{\sum_{i=1}^{n}z_{i3}y_i}{\sum_{i=1}^{n}z_{i3}}\\ \hat{\sigma^2} = \frac{\sum_{i=1}^n \sum_{j=1}^3 z_{ij}(y_i - \mu_j)^2}{\sum_{i=1}^n \sum_{j=1}^3 z_{ij}} \] Replace our initial estimate guess with these estimates. Check for MRE convergence. If converged, stop. Else, repeat until convergence. Max iterations we will set at 100.
EM.finitemixture <- function(y, initials, maxit, tolerance) {
n = length(y)
header = paste0("iteration", " log.like", " MRE")
print(header)
for(it in 1:maxit) {
alpha = initials[1]
beta = initials[2]
p3 = 1 - alpha - beta
mu1 = initials[3]
mu2 = initials[4]
mu3 = initials[5]
sigma = sqrt(initials[6])
f1 = dnorm(y, mean=mu1, sd=sigma)
f2 = dnorm(y, mean=mu2, sd=sigma)
f3 = dnorm(y, mean=mu3, sd=sigma)
N1 = f1*alpha
N2 = f2*beta
N3 = f3*p3
D = N1 + N2 + N3
#E-step: Compute E*(z_ij)
Z.i1 = N1/D
alpha.hat = sum(Z.i1)/n #alpha
mu1.hat = sum(Z.i1 * y)/sum(Z.i1) #mu1
Z.i2 = N2/D
beta.hat = sum(Z.i2)/n #beta
mu2.hat = sum(Z.i2 *y)/sum(Z.i2) #mu2
p3.hat = 1 - alpha.hat - beta.hat #p3
Z.i3 = N3/D
mu3.hat = sum(Z.i3*y)/sum(Z.i3) #mu3
sigma2.hat = sum(Z.i1*(y-mu1)^2 + Z.i2*(y-mu2)^2 + Z.i3*(y-mu3)^2)/sum(Z.i1 + Z.i2 + Z.i3)
sigma.hat = sqrt(sigma2.hat) #sigma
log.like = sum(Z.i1*(log(f1) + log(alpha)) + Z.i2*(log(f2) + log(beta)) + Z.i3*(log(f3) + log(p3)))
theta.new = c(alpha.hat, beta.hat, mu1.hat, mu2.hat, mu3.hat, sigma.hat)
#check convergence
c1 = abs(max(1, theta.new))
c2 = abs(theta.new - initials)
mre = max(c2/c1)
print(sprintf('%2.0f %12.5f %.2e', it, log.like, mre))
if(mre < tolerance) {
break
}
initials = theta.new
}#end for loop
print(initials)
return(initials)
}#end function
initials = c(1/3, 1/3, 0, 3, 5, 1) #alpha, beta, mu1, mu2, mu3, sigma
est = EM.finitemixture(y, initials, maxit=100, 10^-6)
## [1] "iteration log.like MRE"
## [1] " 1 -795.56493 5.25e-02"
## [1] " 2 -786.56544 2.02e-02"
## [1] " 3 -781.55262 2.37e-02"
## [1] " 4 -775.15912 2.52e-02"
## [1] " 5 -768.21354 2.53e-02"
## [1] " 6 -761.32322 2.43e-02"
## [1] " 7 -754.95722 2.24e-02"
## [1] " 8 -749.42115 1.99e-02"
## [1] " 9 -744.83832 1.73e-02"
## [1] "10 -741.18061 1.47e-02"
## [1] "11 -738.33058 1.24e-02"
## [1] "12 -736.13982 1.04e-02"
## [1] "13 -734.46575 8.63e-03"
## [1] "14 -733.18756 7.18e-03"
## [1] "15 -732.20942 5.96e-03"
## [1] "16 -731.45784 4.94e-03"
## [1] "17 -730.87742 4.09e-03"
## [1] "18 -730.42668 3.39e-03"
## [1] "19 -730.07464 2.80e-03"
## [1] "20 -729.79813 2.31e-03"
## [1] "21 -729.57976 1.91e-03"
## [1] "22 -729.40641 1.58e-03"
## [1] "23 -729.26814 1.30e-03"
## [1] "24 -729.15739 1.07e-03"
## [1] "25 -729.06833 8.83e-04"
## [1] "26 -728.99648 7.28e-04"
## [1] "27 -728.93833 5.99e-04"
## [1] "28 -728.89115 4.94e-04"
## [1] "29 -728.85279 4.06e-04"
## [1] "30 -728.82153 3.35e-04"
## [1] "31 -728.79603 2.75e-04"
## [1] "32 -728.77520 2.27e-04"
## [1] "33 -728.75815 1.87e-04"
## [1] "34 -728.74420 1.53e-04"
## [1] "35 -728.73276 1.26e-04"
## [1] "36 -728.72339 1.04e-04"
## [1] "37 -728.71570 8.55e-05"
## [1] "38 -728.70938 7.03e-05"
## [1] "39 -728.70420 5.79e-05"
## [1] "40 -728.69994 4.76e-05"
## [1] "41 -728.69644 3.92e-05"
## [1] "42 -728.69357 3.22e-05"
## [1] "43 -728.69121 2.65e-05"
## [1] "44 -728.68926 2.18e-05"
## [1] "45 -728.68767 1.79e-05"
## [1] "46 -728.68635 1.47e-05"
## [1] "47 -728.68527 1.21e-05"
## [1] "48 -728.68439 9.98e-06"
## [1] "49 -728.68366 8.21e-06"
## [1] "50 -728.68306 6.75e-06"
## [1] "51 -728.68256 5.55e-06"
## [1] "52 -728.68216 4.57e-06"
## [1] "53 -728.68182 3.76e-06"
## [1] "54 -728.68155 3.09e-06"
## [1] "55 -728.68132 2.54e-06"
## [1] "56 -728.68114 2.09e-06"
## [1] "57 -728.68098 1.72e-06"
## [1] "58 -728.68086 1.42e-06"
## [1] "59 -728.68075 1.16e-06"
## [1] "60 -728.68067 9.58e-07"
## [1] 0.1808628 0.2955776 -1.0994179 1.6819511 4.8500044 1.0016649
Therefore, our estimates are the following: \[ \hat{\alpha} \approx 0.1808628\\ \hat{\beta} \approx 0.2955776\\ \hat{\mu_1} \approx -1.0994179\\ \hat{\mu_2} \approx 1.6819511\\ \hat{\mu_3} \approx 4.8500044\\ \hat{\sigma^2} \approx 1.0016649 \]
sy = sort(y)
z = est[1]*dnorm(sy, mean=est[3], sd=est[6]) + est[2]*dnorm(sy, mean=est[4], sd=est[6]) + (1- est[1] - est[2])*dnorm(sy, mean=est[5], sd=est[6])
hist(y, prob=TRUE, col='black', border='white')
grid(nx=NA, ny=NULL, lty=1, lwd=1, col='gray')
lines(sy, z, col='red', lwd=4)
case.num = seq(1:length(y))
group.num = rep(0, length(case.num))
for (i in 1:length(y)) {
current.y = y[i]
P1 = dnorm(current.y, mean=est[3], sd=est[6])*est[1]
P2 = dnorm(current.y, mean=est[4], sd=est[6])*est[2]
P3 = dnorm(current.y, mean=est[5], sd=est[6])*(1 - est[1] - est[2])
D = P1 + P2 + P3
posteriors = c(P1, P2, P3)/D
max.index = which.max(posteriors)
group.num[i] = max.index
}
dataColor = rep('me', length(y))
for(i in 1:length(group.num)) {
group = group.num[i]
if (group == 1) {
dataColor[i] = 'red'
} else if (group == 2) {
dataColor[i] = 'green'
} else if (group == 3) {
dataColor[i] = 'blue'
}
}#end for loop
plot(case.num, group.num, col=dataColor, xlab='Case Number', ylab='Group Number', ylim = c(1,3), yaxt='n')
axis(2, at=1:3)
I notice a pattern of red, green, and blue colors. This response receives full credit because it answered the prompt and there is no rubric or further directions.