Exercise J-4.2

(a)

# data
data1 <- read.table(file="C:/RClass/ExJ42.txt", header=TRUE)
attach(data1)

# Density Histogram
hist(y, breaks=10, freq=FALSE,main="Density Histogram of Y", col="grey")
lines(density(y), col="red")

(b) EM Algorithm

  • Start with initial values \[(\alpha,\beta, \mu_1, \mu_2, \mu_3, \sigma^2)\]
  • E-step: Compute \[E^*(z_{ij})=\frac{f_j(y_i)\pi_j}{\sum_{k=1}^{3}f_k(y_i)\pi_k}\]
  • M-step: Obtain \[\hat{\alpha}=\sum_{i=1}^{n}\frac{E^*(z_{i1})}{n}\] \[\hat{\beta}=\sum_{i=1}^{n}\frac{E^*(z_{i2})}{n}\] \[\hat{\mu_1}=\frac{\sum_{i=1}^{n}E^*(z_{i1})y_i}{\sum_{i=1}^{n}E^*(z_{i1})}\] \[\hat{\mu_2}=\frac{\sum_{i=1}^{n}E^*(z_{i2})y_i}{\sum_{i=1}^{n}E^*(z_{i2})}\] \[\hat{\mu_3}=\frac{\sum_{i=1}^{n}E^*(z_{i3})y_i}{\sum_{i=1}^{n}E^*(z_{i3})}\] \[\hat{\sigma^2}=\frac{\sum_{i=1}^{n}\sum_{j=1}^{3}E^*(z_{ij})(y_i-\mu_j)^2}{\sum_{i=1}^{n}\sum_{j=1}^{3}E^*(z_{ij})}\]
  • Convergence Check: Replace starting values by above estimates and iterate until convergence. (Convergence criteria: Maximum number of iterations=100, MRE<10^-6)

(c) EM Algorithm

my_EM <- function(y, theta, maxit, tolerr){
n=length(y)  
header = paste0("Iteration", "    Log-likelihood", "      MRE")
print(header)
print("---------------------------------------------------------------")

for (it in 1:maxit){
  alpha = theta[1]
  beta = theta[2]
  mu1 = theta[3]
  mu2 = theta[4]
  mu3 = theta[5]
  sig = sqrt(theta[6])
  
  f1 = dnorm(y, mean=mu1, sd=sig)
  f2 = dnorm(y, mean=mu2, sd=sig)
  f3 = dnorm(y, mean=mu3, sd=sig)
  
  s.zij = alpha*f1 + beta*f2 + (1-alpha-beta)*f3  
  zi1 = (alpha*f1)/s.zij
  zi2 = (beta*f2)/s.zij
  zi3 = ((1-alpha-beta)*f3)/s.zij
  
  alpha.h = sum(zi1)/n
  beta.h = sum(zi2)/n
  mu1.h = sum(zi1*y)/sum(zi1) 
  mu2.h = sum(zi2*y)/sum(zi2) 
  mu3.h = sum(zi3*y)/sum(zi3) 
  sig2.h = sum(zi1*(y-mu1)^2+zi2*(y-mu2)^2+zi3*(y-mu3)^2)/sum(zi1+zi2+zi3) 
  
  Llike = sum(zi1*(log(f1)+log(alpha))+zi2*(log(f2)+log(beta))
          +zi3*(log(f3)+log(1-alpha-beta)))
  theta.n = c(alpha.h, beta.h, mu1.h, mu2.h, mu3.h, sig2.h)     
  Mre = max(abs(theta.n-theta)/pmax(1, abs(theta.n)))  
  print(sprintf('%02.0f         %.12f      %2.1e', it, Llike, Mre))
  if (Mre< tolerr){break}
  theta = theta.n
  }
return(theta)
}

a = my_EM(y=y, theta=c(1/3, 1/3, 0, 1, 2, 1), maxit=100, tolerr=10^-6)
## [1] "Iteration    Log-likelihood      MRE"
## [1] "---------------------------------------------------------------"
## [1] "01         -1456.539471314819      8.2e-01"
## [1] "02         -905.560402094893      6.4e-01"
## [1] "03         -878.123607290854      1.4e-01"
## [1] "04         -858.351415917442      1.5e-01"
## [1] "05         -837.579549351062      1.6e-01"
## [1] "06         -818.578010842632      1.4e-01"
## [1] "07         -804.290541479803      1.1e-01"
## [1] "08         -795.495207202805      7.3e-02"
## [1] "09         -790.704725288920      4.7e-02"
## [1] "10         -787.994570224335      4.6e-02"
## [1] "11         -786.095111171536      5.3e-02"
## [1] "12         -784.359920393833      5.4e-02"
## [1] "13         -782.477230552150      5.1e-02"
## [1] "14         -780.276333259236      4.7e-02"
## [1] "15         -777.635334978986      4.2e-02"
## [1] "16         -774.449448769995      4.7e-02"
## [1] "17         -770.635720014586      5.0e-02"
## [1] "18         -766.163100739031      5.4e-02"
## [1] "19         -761.097921634787      5.6e-02"
## [1] "20         -755.644141598175      5.5e-02"
## [1] "21         -750.143201825262      5.2e-02"
## [1] "22         -745.003568844040      4.6e-02"
## [1] "23         -740.576021175303      3.8e-02"
## [1] "24         -737.046839715051      2.9e-02"
## [1] "25         -734.415723386703      2.1e-02"
## [1] "26         -732.553835896872      1.4e-02"
## [1] "27         -731.284459483583      9.7e-03"
## [1] "28         -730.440178461126      6.5e-03"
## [1] "29         -729.887309286611      4.3e-03"
## [1] "30         -729.528719046841      2.9e-03"
## [1] "31         -729.297551539763      1.9e-03"
## [1] "32         -729.149186369000      1.3e-03"
## [1] "33         -729.054346766481      8.5e-04"
## [1] "34         -728.993996543412      5.7e-04"
## [1] "35         -728.955815261687      3.9e-04"
## [1] "36         -728.931847941602      3.0e-04"
## [1] "37         -728.916965625119      2.5e-04"
## [1] "38         -728.907865613641      2.0e-04"
## [1] "39         -728.902424546607      1.6e-04"
## [1] "40         -728.899280301449      1.3e-04"
## [1] "41         -728.897561822169      1.1e-04"
## [1] "42         -728.896714512770      8.9e-05"
## [1] "43         -728.896387386926      7.3e-05"
## [1] "44         -728.896360150037      6.1e-05"
## [1] "45         -728.896496115487      5.0e-05"
## [1] "46         -728.896711838693      4.2e-05"
## [1] "47         -728.896957568583      3.5e-05"
## [1] "48         -728.897204695335      2.9e-05"
## [1] "49         -728.897437718863      2.5e-05"
## [1] "50         -728.897649134346      2.1e-05"
## [1] "51         -728.897836196254      1.7e-05"
## [1] "52         -728.897998888749      1.4e-05"
## [1] "53         -728.898138667992      1.2e-05"
## [1] "54         -728.898257695937      9.8e-06"
## [1] "55         -728.898358384999      8.2e-06"
## [1] "56         -728.898443137610      6.8e-06"
## [1] "57         -728.898514206450      5.6e-06"
## [1] "58         -728.898573628108      4.6e-06"
## [1] "59         -728.898623200320      3.8e-06"
## [1] "60         -728.898664484040      3.2e-06"
## [1] "61         -728.898698818790      2.6e-06"
## [1] "62         -728.898727344239      2.2e-06"
## [1] "63         -728.898751023840      1.8e-06"
## [1] "64         -728.898770668161      1.5e-06"
## [1] "65         -728.898786956649      1.2e-06"
## [1] "66         -728.898800457237      1.0e-06"
## [1] "67         -728.898811643618      8.5e-07"
a
## [1]  0.1808087  0.2954496 -1.0990965  1.6807930  4.8491577  1.0050563

(d) Density Histogram using (c)

yn = sort(y)
d = (a[1]*dnorm(yn,mean=a[3],sd=sqrt(a[6])))+(a[2]*dnorm(yn,mean=a[4],
    sd=sqrt(a[6])))+((1-a[1]-a[2])*dnorm(yn,mean=a[5],sd=sqrt(a[6])))         
hist(y, breaks=10, freq=FALSE, main="Density Histogram of Y", col="grey")
lines(yn, d, col="red")

(e) Compute the posterior probability

case.num=seq(1:length(y))
group.num=rep(0,length(y))

for (i in 1:length(y)){
  p1=dnorm(y[i],mean=a[3],sd=sqrt(a[6]))*a[3]
  p2=dnorm(y[i],mean=a[4],sd=sqrt(a[6]))*a[4]
  p3=dnorm(y[i],mean=a[5],sd=sqrt(a[6]))*a[5]
  post.pr=c(p1/(p1+p2+p3),p2/(p1+p2+p3),p3/(p1+p2+p3))
  group.num[i]=which.max(post.pr)  
}

plot(x=case.num, y=group.num, col=c("red","green","blue")[group.num],
     ylim=c(1,3), main="The Classification Plot")

Based on the plot, there is a pattern that a specific range of the number of cases fall in to a specific group. This pattern indicates the point of the convergence by each group.