GH-4.3

(a)

\[Q(\theta', \theta)=-\frac{n}{2}\Big(p\cdot log(2\pi)+log|\Sigma(\theta')|+trace\{\Sigma^{-1}(\theta')[S^*-\mu(\theta')(\bar{X^*})^T-\bar{X^*}\mu(\theta')^T+\mu(\theta')\mu(\theta')^T]\}\Big)\] where \[\bar{X^*}=E^*\Big(\frac{1}{n}\sum_{i=1}^{n}X_i\Big)=\frac{1}{n}\sum_{i=1}^{n}E^*(X_i)\] and \[S^*=E^*(S)=\frac{1}{n}E^*\Big(\sum_{i=1}^{n}X_iX_i^T \Big)\]

\[dQ(d\mu')=-\frac{n}{2}trace[\Sigma'^{-1}(-\bar{X^*}d\mu'^T-d\mu'\bar{X^*}^T+d\mu'\mu'^T+\mu'd\mu'^T)]\] \[\frac{\partial Q}{\partial\mu'}=-\frac{n}{2}[\Sigma'^{-1}(-2\bar{X^*}+2\mu')]\] \[dQ(d\Sigma')=-\frac{n}{2}trace(\Sigma'^{-1}d\Sigma')-\frac{n}{2}trace(-\Sigma'^{-1}d\Sigma'\Sigma'^{-1}(S^*-\bar{X^*}\mu'^T-\mu'\bar{X^*}^T+\mu'\mu'^T))\] \[=\frac{n}{2}trace(-\Sigma'^{-1}+\Sigma'^{-1}(S^*-\bar{X^*}\mu'^T-\mu'\bar{X^*}^T+\mu'\mu'^T)\Sigma'^{-1})d\Sigma'\] \[ \frac{\partial Q}{\partial\Sigma'}= \begin{cases} \frac{\partial Q}{\partial\sigma'_{ii}}=\frac{n}{2}\{A'\}_{ii}\\ \frac{\partial Q}{\partial\sigma'_{ij}}=n\{A'\}_{ij} \end{cases} \] \[ where\text{ } A'=\Sigma'^{-1}(S^*-\Sigma'+(\mu'-2\bar{X^*})\mu'^T)\Sigma'^{-1}\]

Since \[\frac{\partial Q\{(\mu',\Sigma'),(\mu,\Sigma)\}}{\partial (\mu',\Sigma')}\vert_{(\mu',\Sigma')=(\mu, \Sigma)}=\nabla\ell_{obs}(\mu,\Sigma)\] Therefore, \[\frac{\partial\ell}{\partial\mu}=\frac{\partial Q}{\partial\mu'}|_{(\mu'=\mu,\Sigma'=\Sigma)}=-\frac{n}{2}[\Sigma^{-1}(-2\bar{X}^*+2\mu)]\] \[ \frac{\partial\ell}{\partial\Sigma}=\frac{\partial Q}{\partial\Sigma'}|_{(\mu'=\mu,\Sigma'=\Sigma)}= \begin{cases} \frac{\partial Q}{\partial\sigma'_{ii}}|_{(\mu'=\mu,\Sigma'=\Sigma)}=\frac{n}{2}\{A\}_{ii}\\ \frac{\partial Q}{\partial\sigma'_{ij}}|_{(\mu'=\mu,\Sigma'=\Sigma)}=n\{A\}_{ij} \end{cases} \] \[ where\text{ } A=\Sigma^{-1}(S^*-\Sigma+(\mu-2\bar{X^*})\mu^T)\Sigma^{-1}\]

(b)

# Data
data2 <- read.table(file="C:/RClass/trivariatenormal.txt", header=TRUE)
attach(data2)

# Expected Function  
my_EXP <- function(data, mu, sig){
  n=dim(data)[1]
  p=dim(data)[2]
  sum.ex.x =  matrix(0,p,1)
  sum.ex.xx = matrix(0,p,p)
  for (i in 1:n){
    obs = which(!is.na(data[i,]))
    mis = which(is.na(data[i,]))
    y = as.matrix(data[i,], nrow=1)
    y.o = as.matrix(y[obs])
    mu.o = as.matrix(mu[obs])
    sig.oo = as.matrix(sig[obs, obs])
    if (length(mis)!=0){
    mu.m = as.matrix(mu[mis])
    sig.om = as.matrix(sig[mis, obs])
    sig.mm = as.matrix(sig[mis, mis])
    sig.mo = t(sig.om)
    if (sum(is.na(data[i,])) %% 2 == 0) {t=t(sig.mo)} else {t=sig.mo}
    ym.star = mu.m + (t %*% solve(sig.oo) %*% (y.o-mu.o))
    ex.x = matrix(0,p,1)
    ex.x[obs] = y.o
    ex.x[mis] = ym.star
    ex.xx = matrix(0,p,p)
    ex.xx[obs, obs] = y.o %*% t(y.o)
    ex.xx[mis, obs] = y.o %*% t(ym.star)
    ex.xx[obs, mis] = ym.star %*% t(y.o)
    ex.xx[mis, mis] = sig.mm - (t %*% solve(sig.oo) %*% t(t)) + ym.star %*% t(ym.star)
    }
    else{
    ex.x = y.o
    ex.xx = y.o %*% t(y.o)
    }
    sum.ex.x =  sum.ex.x + ex.x
    sum.ex.xx = sum.ex.xx + ex.xx
  }
list(sum.ex.x=sum.ex.x, sum.ex.xx=sum.ex.xx)   
}

# Matrix -> Vector (Sigma)
sig_fun <- function(A, data){
  n=dim(data)[1]
  p=dim(data)[2]
  V = numeric(p*(p+1)/2)
  cnt = 0
  for(i in 1:nrow(A)){
    for (j in 1:i){
      cnt = cnt + 1    
      V[cnt] = A[i,j]
      if(i==j){V[cnt]=(n/2)*A[i,j]}
      else if(i!=j){V[cnt]=n*A[i,j]}
    }
  }
  V = matrix(V, ncol=1)
  return(V)
}

# EM Function
my_EM <- function(data, mu, sig, maxit){
  header = paste0("It", "     mu1", "      mu2", "      sig11", "     sig13",
                  "    ||gradient||")
  print(header)
  n=dim(data)[1]
  p=dim(data)[2]
  for (it in 1:maxit){
  if (it==1|it==2|it==3|it==33|it==34|it==35){
  print("-----------------------------------------------------------") 
  }  
  com = my_EXP(data=data ,mu=mu, sig=sig)
  sum.ex.x = com$sum.ex.x
  sum.ex.xx = com$sum.ex.xx  
  xbstar = (1/n)*sum.ex.x
  sstar = (1/n)*sum.ex.xx 
  mu.h = xbstar
  sig.h = sstar - xbstar%*%t(xbstar)
  gradm = (n/2)*(solve(sig)%*%(2*xbstar-2*mu))
  gs = solve(sig)%*%(sstar-sig+(mu-2*xbstar)%*%t(mu))%*%solve(sig)
  grads = sig_fun(A=gs, data=data)
  gradms = matrix(c(gradm, grads), ncol=1)  
  gradnorm = norm(gradms, type="2")
  if (it==1|it==2|it==3|it==33|it==34|it==35){
  print(sprintf('%02.0f  %.6f  %.6f  %.6f  %.6f    %2.1e',
                it, mu.h[1], mu.h[3], sig.h[1,1], sig.h[1,3], gradnorm))
  }
  if (gradnorm < 10^-6){break}
  mu = mu.h
  sig = sig.h
  }
 return(list(mu=mu, sig=sig))
}  

mu_ini = c(0,0,0)
sig_ini = diag(1,3,3)
a <- my_EM(data=data2, mu=mu_ini, sig=sig_ini, maxit=50)
## [1] "It     mu1      mu2      sig11     sig13    ||gradient||"
## [1] "-----------------------------------------------------------"
## [1] "01  0.596458  7.686042  1.414481  0.514963    2.0e+03"
## [1] "-----------------------------------------------------------"
## [1] "02  0.800612  8.834786  1.353736  0.933478    2.6e+01"
## [1] "-----------------------------------------------------------"
## [1] "03  0.849187  8.976123  1.341558  1.137180    5.8e+01"
## [1] "-----------------------------------------------------------"
## [1] "33  0.878601  9.025745  1.413132  1.318472    3.7e-06"
## [1] "-----------------------------------------------------------"
## [1] "34  0.878601  9.025745  1.413132  1.318472    1.7e-06"
## [1] "-----------------------------------------------------------"
## [1] "35  0.878601  9.025745  1.413132  1.318472    8.4e-07"
a
## $mu
##          [,1]
## [1,] 0.878601
## [2,] 2.850157
## [3,] 9.025745
## 
## $sig
##          [,1]      [,2]      [,3]
## [1,] 1.413132 1.0015800 1.3184716
## [2,] 1.001580 0.7779349 0.7043226
## [3,] 1.318472 0.7043226 2.5224430