Math 534 HW5 Part 1

Exercise J-4.1 Part a

EM.multi <- function(theta, maxit, tollerr) {
  header = paste0("iteration", "           theta", "          MRE")
  print(header)
  print(sprintf('%2.0f           %12.12f       --', 0, theta))
  for (it in 1:maxit) {
    
    #E-step: obtain E*(x2)
    Estar.x2 = 1997*(theta/(2+theta))
    
    #M-step: obtain theta.new
    theta.new = (Estar.x2 + 32)/(Estar.x2 + 907 + 904 + 32)
    
    #check convergence
    mre = abs(theta.new - theta)/max(1, abs(theta.new))
    
    #print stuff
    print(sprintf('%2.0f           %12.12f        %.1e', it, theta.new, mre))
    
    if(mre < tollerr) {
      break
    }
    theta = theta.new
  }#end for loop using maxit
  return(theta)
}

theta.0 = 0.02
tolerance = 1e-6
estimate = EM.multi(theta.0, maxit=200, tollerr=tolerance)

## [1] "iteration           theta          MRE"
## [1] " 0           0.020000000000       --"
## [1] " 1           0.027793132773        7.8e-03"
## [1] " 2           0.031742941132        3.9e-03"
## [1] " 3           0.033721123764        2.0e-03"
## [1] " 4           0.034705946241        9.8e-04"
## [1] " 5           0.035194771667        4.9e-04"
## [1] " 6           0.035437045211        2.4e-04"
## [1] " 7           0.035557033560        1.2e-04"
## [1] " 8           0.035616437341        5.9e-05"
## [1] " 9           0.035645841640        2.9e-05"
## [1] "10           0.035660395186        1.5e-05"
## [1] "11           0.035667598091        7.2e-06"
## [1] "12           0.035671162906        3.6e-06"
## [1] "13           0.035672927162        1.8e-06"
## [1] "14           0.035673800302        8.7e-07"

theta.star = (-1657 + sqrt(3728689))/7680

Exercise J-4.1 Part b

EM.multi.cvratio <- function(theta, maxit, tollerr) {
  T.STAR = (-1657 + sqrt(3728689))/7680
  
  header =  paste0("iteration", "           theta","            Lin.Ratio","            Quad.Ratio")
  print(header)
  print(sprintf('%2.0f           %12.12f         --                 -- ', 0, theta))
  for (it in 1:maxit) {
    
    #E-step: obtain E*(x2)
    Estar.x2 = 1997*(theta/(2+theta))
    
    #M-step: obtain theta.new
    theta.new = (Estar.x2 + 32)/(Estar.x2 + 907 + 904 + 32)
    
    lin.ratio = abs(theta.new - T.STAR)/(abs(theta - T.STAR))
    quad.ratio = abs(theta.new - T.STAR)/((abs(theta - T.STAR)^2))
    #check convergence
    mre = abs(theta.new - theta)/max(1, abs(theta.new))
    
    #print stuff
    print(sprintf('%2.0f           %12.12f         %.3e            %.3e', it, theta.new, lin.ratio, quad.ratio))
    
    if(mre < tollerr) {
      break
    }
    theta = theta.new
  }#end for loop using maxit
  return(theta)
}

theta.0 = 0.02
tolerance = 1e-10
ratios = EM.multi.cvratio(theta.0, maxit=200, tolerance)

## [1] "iteration           theta            Lin.Ratio            Quad.Ratio"
## [1] " 0           0.020000000000         --                 -- "
## [1] " 1           0.027793132773         5.028e-01            3.208e+01"
## [1] " 2           0.031742941132         4.989e-01            6.329e+01"
## [1] " 3           0.033721123764         4.969e-01            1.264e+02"
## [1] " 4           0.034705946241         4.959e-01            2.538e+02"
## [1] " 5           0.035194771667         4.954e-01            5.114e+02"
## [1] " 6           0.035437045211         4.951e-01            1.032e+03"
## [1] " 7           0.035557033560         4.950e-01            2.083e+03"
## [1] " 8           0.035616437341         4.950e-01            4.208e+03"
## [1] " 9           0.035645841640         4.949e-01            8.501e+03"
## [1] "10           0.035660395186         4.949e-01            1.718e+04"
## [1] "11           0.035667598091         4.949e-01            3.470e+04"
## [1] "12           0.035671162906         4.949e-01            7.012e+04"
## [1] "13           0.035672927162         4.949e-01            1.417e+05"
## [1] "14           0.035673800302         4.949e-01            2.863e+05"
## [1] "15           0.035674232423         4.949e-01            5.785e+05"
## [1] "16           0.035674446281         4.949e-01            1.169e+06"
## [1] "17           0.035674552120         4.949e-01            2.362e+06"
## [1] "18           0.035674604500         4.949e-01            4.772e+06"
## [1] "19           0.035674630423         4.949e-01            9.643e+06"
## [1] "20           0.035674643252         4.949e-01            1.948e+07"
## [1] "21           0.035674649602         4.949e-01            3.937e+07"
## [1] "22           0.035674652744         4.949e-01            7.955e+07"
## [1] "23           0.035674654299         4.949e-01            1.607e+08"
## [1] "24           0.035674655069         4.949e-01            3.248e+08"
## [1] "25           0.035674655450         4.949e-01            6.563e+08"
## [1] "26           0.035674655638         4.949e-01            1.326e+09"
## [1] "27           0.035674655731         4.949e-01            2.679e+09"

It appears that the algorithm converges linearly since the the Linear Ratio is converging to a fixed positive finite number (4.949e-01). The algorithm does not converge quadratically since the Quadratic Ratio is increasing at every iteration and not settling upon a fixed positive finite number.

Exercise J-4.2 Part a

data = read.csv('ExJ42.txt')
y = data$y
hist(y, prob=TRUE, col='black', border='white')
lines(density(y, na.rm=T), col='red', lwd=4)
grid(nx=NA, ny=NULL, lty=1, lwd=1, col='gray')

Exercise J-4.2 Part b

The EM Algorithm is the following:

-Start with an initial guess \((\alpha, \beta, \mu_1, \mu_2, \mu_3, \sigma^2)\)

-E-step: Compute \(E^{*}(z_{ij}) = \frac{f_j(y_i)\pi_j}{\sum_{k=1}^{3}f_k(y_i)\pi_k}\)

-M-step: Obtain the following:

\[ \hat{\alpha} = \sum_{i=1}^{n}\frac{z_{i1}}{n}\\ \hat{\beta} = \sum_{i=1}^{n}\frac{z_{i2}}{n}\\ \hat{\mu_1} = \frac{\sum_{i=1}^{n}z_{i1}y_i}{\sum_{i=1}^{n}z_{i1}}\\ \hat{\mu_2} = \frac{\sum_{i=1}^{n}z_{i2}y_i}{\sum_{i=1}^{n}z_{i2}}\\ \hat{\mu_3} = \frac{\sum_{i=1}^{n}z_{i3}y_i}{\sum_{i=1}^{n}z_{i3}}\\ \hat{\sigma^2} = \frac{\sum_{i=1}^n \sum_{j=1}^3 z_{ij}(y_i - \mu_j)^2}{\sum_{i=1}^n \sum_{j=1}^3 z_{ij}} \] Replace our initial estimate guess with these estimates. Check for MRE convergence. If converged, stop. Else, repeat until convergence. Max iterations we will set at 100.

Exercise J-4.2 Part c

EM.finitemixture <- function(y, initials, maxit, tolerance) {
  n = length(y)
  header = paste0("iteration", "           log.like", "          MRE")
  print(header)
 
  for(it in 1:maxit) {
    alpha = initials[1]
    beta = initials[2]
    p3 = 1 - alpha - beta
    mu1 = initials[3]
    mu2 = initials[4]
    mu3 = initials[5]
    sigma = sqrt(initials[6])
    
    f1 = dnorm(y, mean=mu1, sd=sigma)
    f2 = dnorm(y, mean=mu2, sd=sigma)
    f3 = dnorm(y, mean=mu3, sd=sigma)
    N1 = f1*alpha
    N2 = f2*beta
    N3 = f3*p3
    D = N1 + N2 + N3
    
    
    #E-step: Compute E*(z_ij)
    Z.i1 = N1/D
    alpha.hat = sum(Z.i1)/n #alpha
    mu1.hat = sum(Z.i1 * y)/sum(Z.i1) #mu1
    
    Z.i2 = N2/D
    beta.hat = sum(Z.i2)/n #beta
    mu2.hat = sum(Z.i2 *y)/sum(Z.i2) #mu2
    p3.hat = 1 - alpha.hat - beta.hat #p3
   
    Z.i3 = N3/D
    mu3.hat = sum(Z.i3*y)/sum(Z.i3) #mu3
    
    sigma2.hat = sum(Z.i1*(y-mu1)^2 + Z.i2*(y-mu2)^2 + Z.i3*(y-mu3)^2)/sum(Z.i1 + Z.i2 + Z.i3)
    sigma.hat = sqrt(sigma2.hat) #sigma
    
    log.like = sum(Z.i1*(log(f1) + log(alpha)) + Z.i2*(log(f2) + log(beta)) + Z.i3*(log(f3) + log(p3)))
   
    theta.new = c(alpha.hat, beta.hat, mu1.hat, mu2.hat, mu3.hat, sigma.hat)
  
    #check convergence
    c1 = abs(max(1, theta.new))
    c2 = abs(theta.new - initials)
    mre = max(c2/c1)
    
    print(sprintf('%2.0f           %12.5f       %.2e', it, log.like, mre))
    
    if(mre < tolerance) {
      break
    }
    initials = theta.new
    
  }#end for loop
  print(initials)
  return(initials)
}#end function

initials = c(1/3, 1/3, 0, 3, 5, 1) #alpha, beta, mu1, mu2, mu3, sigma
est = EM.finitemixture(y, initials, maxit=100, 10^-6)

## [1] "iteration           log.like          MRE"
## [1] " 1             -795.56493       5.25e-02"
## [1] " 2             -786.56544       2.02e-02"
## [1] " 3             -781.55262       2.37e-02"
## [1] " 4             -775.15912       2.52e-02"
## [1] " 5             -768.21354       2.53e-02"
## [1] " 6             -761.32322       2.43e-02"
## [1] " 7             -754.95722       2.24e-02"
## [1] " 8             -749.42115       1.99e-02"
## [1] " 9             -744.83832       1.73e-02"
## [1] "10             -741.18061       1.47e-02"
## [1] "11             -738.33058       1.24e-02"
## [1] "12             -736.13982       1.04e-02"
## [1] "13             -734.46575       8.63e-03"
## [1] "14             -733.18756       7.18e-03"
## [1] "15             -732.20942       5.96e-03"
## [1] "16             -731.45784       4.94e-03"
## [1] "17             -730.87742       4.09e-03"
## [1] "18             -730.42668       3.39e-03"
## [1] "19             -730.07464       2.80e-03"
## [1] "20             -729.79813       2.31e-03"
## [1] "21             -729.57976       1.91e-03"
## [1] "22             -729.40641       1.58e-03"
## [1] "23             -729.26814       1.30e-03"
## [1] "24             -729.15739       1.07e-03"
## [1] "25             -729.06833       8.83e-04"
## [1] "26             -728.99648       7.28e-04"
## [1] "27             -728.93833       5.99e-04"
## [1] "28             -728.89115       4.94e-04"
## [1] "29             -728.85279       4.06e-04"
## [1] "30             -728.82153       3.35e-04"
## [1] "31             -728.79603       2.75e-04"
## [1] "32             -728.77520       2.27e-04"
## [1] "33             -728.75815       1.87e-04"
## [1] "34             -728.74420       1.53e-04"
## [1] "35             -728.73276       1.26e-04"
## [1] "36             -728.72339       1.04e-04"
## [1] "37             -728.71570       8.55e-05"
## [1] "38             -728.70938       7.03e-05"
## [1] "39             -728.70420       5.79e-05"
## [1] "40             -728.69994       4.76e-05"
## [1] "41             -728.69644       3.92e-05"
## [1] "42             -728.69357       3.22e-05"
## [1] "43             -728.69121       2.65e-05"
## [1] "44             -728.68926       2.18e-05"
## [1] "45             -728.68767       1.79e-05"
## [1] "46             -728.68635       1.47e-05"
## [1] "47             -728.68527       1.21e-05"
## [1] "48             -728.68439       9.98e-06"
## [1] "49             -728.68366       8.21e-06"
## [1] "50             -728.68306       6.75e-06"
## [1] "51             -728.68256       5.55e-06"
## [1] "52             -728.68216       4.57e-06"
## [1] "53             -728.68182       3.76e-06"
## [1] "54             -728.68155       3.09e-06"
## [1] "55             -728.68132       2.54e-06"
## [1] "56             -728.68114       2.09e-06"
## [1] "57             -728.68098       1.72e-06"
## [1] "58             -728.68086       1.42e-06"
## [1] "59             -728.68075       1.16e-06"
## [1] "60             -728.68067       9.58e-07"
## [1]  0.1808628  0.2955776 -1.0994179  1.6819511  4.8500044  1.0016649

Therefore, our estimates are the following: \[ \hat{\alpha} \approx 0.1808628\\ \hat{\beta} \approx 0.2955776\\ \hat{\mu_1} \approx -1.0994179\\ \hat{\mu_2} \approx 1.6819511\\ \hat{\mu_3} \approx 4.8500044\\ \hat{\sigma^2} \approx 1.0016649 \]

Exercise J-4.2 Part d

sy = sort(y)
z = est[1]*dnorm(sy, mean=est[3], sd=est[6]) + est[2]*dnorm(sy, mean=est[4], sd=est[6]) + (1- est[1] - est[2])*dnorm(sy, mean=est[5], sd=est[6])
hist(y, prob=TRUE, col='black', border='white')
grid(nx=NA, ny=NULL, lty=1, lwd=1, col='gray')
lines(sy, z, col='red', lwd=4)

Exercise J-4.2 Part e

case.num = seq(1:length(y))
group.num = rep(0, length(case.num))
for (i in 1:length(y)) {
  current.y = y[i]
  P1 = dnorm(current.y, mean=est[3], sd=est[6])*est[1]
  P2 = dnorm(current.y, mean=est[4], sd=est[6])*est[2]
  P3 = dnorm(current.y, mean=est[5], sd=est[6])*(1 - est[1] - est[2])
  D = P1 + P2 + P3
  posteriors = c(P1, P2, P3)/D
  max.index = which.max(posteriors)
  group.num[i] = max.index
}

dataColor = rep('me', length(y))
for(i in 1:length(group.num)) {
  group = group.num[i]
  if (group == 1) {
    dataColor[i] = 'red'
  } else if (group == 2) {
    dataColor[i] = 'green'
  } else if (group == 3) {
    dataColor[i] = 'blue'
  }
}#end for loop

plot(case.num, group.num, col=dataColor, xlab='Case Number', ylab='Group Number', ylim = c(1,3), yaxt='n')
axis(2, at=1:3)

I notice a pattern of red, green, and blue colors. This response receives full credit because it answered the prompt and there is no rubric or further directions.