# data
data1 <- read.table(file="C:/RClass/ExJ42.txt", header=TRUE)
attach(data1)
# Density Histogram
hist(y, breaks=10, freq=FALSE,main="Density Histogram of Y", col="grey")
lines(density(y), col="red")
my_EM <- function(y, theta, maxit, tolerr){
n=length(y)
header = paste0("Iteration", " Log-likelihood", " MRE")
print(header)
print("---------------------------------------------------------------")
for (it in 1:maxit){
alpha = theta[1]
beta = theta[2]
mu1 = theta[3]
mu2 = theta[4]
mu3 = theta[5]
sig = sqrt(theta[6])
f1 = dnorm(y, mean=mu1, sd=sig)
f2 = dnorm(y, mean=mu2, sd=sig)
f3 = dnorm(y, mean=mu3, sd=sig)
s.zij = alpha*f1 + beta*f2 + (1-alpha-beta)*f3
zi1 = (alpha*f1)/s.zij
zi2 = (beta*f2)/s.zij
zi3 = ((1-alpha-beta)*f3)/s.zij
alpha.h = sum(zi1)/n
beta.h = sum(zi2)/n
mu1.h = sum(zi1*y)/sum(zi1)
mu2.h = sum(zi2*y)/sum(zi2)
mu3.h = sum(zi3*y)/sum(zi3)
sig2.h = sum(zi1*(y-mu1)^2+zi2*(y-mu2)^2+zi3*(y-mu3)^2)/sum(zi1+zi2+zi3)
Llike = sum(zi1*(log(f1)+log(alpha))+zi2*(log(f2)+log(beta))
+zi3*(log(f3)+log(1-alpha-beta)))
theta.n = c(alpha.h, beta.h, mu1.h, mu2.h, mu3.h, sig2.h)
Mre = max(abs(theta.n-theta)/pmax(1, abs(theta.n)))
print(sprintf('%02.0f %.12f %2.1e', it, Llike, Mre))
if (Mre< tolerr){break}
theta = theta.n
}
return(theta)
}
a = my_EM(y=y, theta=c(1/3, 1/3, 0, 1, 2, 1), maxit=100, tolerr=10^-6)
## [1] "Iteration Log-likelihood MRE"
## [1] "---------------------------------------------------------------"
## [1] "01 -1456.539471314819 8.2e-01"
## [1] "02 -905.560402094893 6.4e-01"
## [1] "03 -878.123607290854 1.4e-01"
## [1] "04 -858.351415917442 1.5e-01"
## [1] "05 -837.579549351062 1.6e-01"
## [1] "06 -818.578010842632 1.4e-01"
## [1] "07 -804.290541479803 1.1e-01"
## [1] "08 -795.495207202805 7.3e-02"
## [1] "09 -790.704725288920 4.7e-02"
## [1] "10 -787.994570224335 4.6e-02"
## [1] "11 -786.095111171536 5.3e-02"
## [1] "12 -784.359920393833 5.4e-02"
## [1] "13 -782.477230552150 5.1e-02"
## [1] "14 -780.276333259236 4.7e-02"
## [1] "15 -777.635334978986 4.2e-02"
## [1] "16 -774.449448769995 4.7e-02"
## [1] "17 -770.635720014586 5.0e-02"
## [1] "18 -766.163100739031 5.4e-02"
## [1] "19 -761.097921634787 5.6e-02"
## [1] "20 -755.644141598175 5.5e-02"
## [1] "21 -750.143201825262 5.2e-02"
## [1] "22 -745.003568844040 4.6e-02"
## [1] "23 -740.576021175303 3.8e-02"
## [1] "24 -737.046839715051 2.9e-02"
## [1] "25 -734.415723386703 2.1e-02"
## [1] "26 -732.553835896872 1.4e-02"
## [1] "27 -731.284459483583 9.7e-03"
## [1] "28 -730.440178461126 6.5e-03"
## [1] "29 -729.887309286611 4.3e-03"
## [1] "30 -729.528719046841 2.9e-03"
## [1] "31 -729.297551539763 1.9e-03"
## [1] "32 -729.149186369000 1.3e-03"
## [1] "33 -729.054346766481 8.5e-04"
## [1] "34 -728.993996543412 5.7e-04"
## [1] "35 -728.955815261687 3.9e-04"
## [1] "36 -728.931847941602 3.0e-04"
## [1] "37 -728.916965625119 2.5e-04"
## [1] "38 -728.907865613641 2.0e-04"
## [1] "39 -728.902424546607 1.6e-04"
## [1] "40 -728.899280301449 1.3e-04"
## [1] "41 -728.897561822169 1.1e-04"
## [1] "42 -728.896714512770 8.9e-05"
## [1] "43 -728.896387386926 7.3e-05"
## [1] "44 -728.896360150037 6.1e-05"
## [1] "45 -728.896496115487 5.0e-05"
## [1] "46 -728.896711838693 4.2e-05"
## [1] "47 -728.896957568583 3.5e-05"
## [1] "48 -728.897204695335 2.9e-05"
## [1] "49 -728.897437718863 2.5e-05"
## [1] "50 -728.897649134346 2.1e-05"
## [1] "51 -728.897836196254 1.7e-05"
## [1] "52 -728.897998888749 1.4e-05"
## [1] "53 -728.898138667992 1.2e-05"
## [1] "54 -728.898257695937 9.8e-06"
## [1] "55 -728.898358384999 8.2e-06"
## [1] "56 -728.898443137610 6.8e-06"
## [1] "57 -728.898514206450 5.6e-06"
## [1] "58 -728.898573628108 4.6e-06"
## [1] "59 -728.898623200320 3.8e-06"
## [1] "60 -728.898664484040 3.2e-06"
## [1] "61 -728.898698818790 2.6e-06"
## [1] "62 -728.898727344239 2.2e-06"
## [1] "63 -728.898751023840 1.8e-06"
## [1] "64 -728.898770668161 1.5e-06"
## [1] "65 -728.898786956649 1.2e-06"
## [1] "66 -728.898800457237 1.0e-06"
## [1] "67 -728.898811643618 8.5e-07"
a
## [1] 0.1808087 0.2954496 -1.0990965 1.6807930 4.8491577 1.0050563
yn = sort(y)
d = (a[1]*dnorm(yn,mean=a[3],sd=sqrt(a[6])))+(a[2]*dnorm(yn,mean=a[4],
sd=sqrt(a[6])))+((1-a[1]-a[2])*dnorm(yn,mean=a[5],sd=sqrt(a[6])))
hist(y, breaks=10, freq=FALSE, main="Density Histogram of Y", col="grey")
lines(yn, d, col="red")
case.num=seq(1:length(y))
group.num=rep(0,length(y))
for (i in 1:length(y)){
p1=dnorm(y[i],mean=a[3],sd=sqrt(a[6]))*a[3]
p2=dnorm(y[i],mean=a[4],sd=sqrt(a[6]))*a[4]
p3=dnorm(y[i],mean=a[5],sd=sqrt(a[6]))*a[5]
post.pr=c(p1/(p1+p2+p3),p2/(p1+p2+p3),p3/(p1+p2+p3))
group.num[i]=which.max(post.pr)
}
plot(x=case.num, y=group.num, col=c("red","green","blue")[group.num],
ylim=c(1,3), main="The Classification Plot")
Based on the plot, there is a pattern that a specific range of the
number of cases fall in to a specific group. This pattern indicates the
point of the convergence by each group.