\[Q(\theta', \theta)=-\frac{n}{2}\Big(p\cdot log(2\pi)+log|\Sigma(\theta')|+trace\{\Sigma^{-1}(\theta')[S^*-\mu(\theta')(\bar{X^*})^T-\bar{X^*}\mu(\theta')^T+\mu(\theta')\mu(\theta')^T]\}\Big)\] where \[\bar{X^*}=E^*\Big(\frac{1}{n}\sum_{i=1}^{n}X_i\Big)=\frac{1}{n}\sum_{i=1}^{n}E^*(X_i)\] and \[S^*=E^*(S)=\frac{1}{n}E^*\Big(\sum_{i=1}^{n}X_iX_i^T \Big)\]
\[dQ(d\mu')=-\frac{n}{2}trace[\Sigma'^{-1}(-\bar{X^*}d\mu'^T-d\mu'\bar{X^*}^T+d\mu'\mu'^T+\mu'd\mu'^T)]\] \[\frac{\partial Q}{\partial\mu'}=-\frac{n}{2}[\Sigma'^{-1}(-2\bar{X^*}+2\mu')]\] \[dQ(d\Sigma')=-\frac{n}{2}trace(\Sigma'^{-1}d\Sigma')-\frac{n}{2}trace(-\Sigma'^{-1}d\Sigma'\Sigma'^{-1}(S^*-\bar{X^*}\mu'^T-\mu'\bar{X^*}^T+\mu'\mu'^T))\] \[=\frac{n}{2}trace(-\Sigma'^{-1}+\Sigma'^{-1}(S^*-\bar{X^*}\mu'^T-\mu'\bar{X^*}^T+\mu'\mu'^T)\Sigma'^{-1})d\Sigma'\] \[ \frac{\partial Q}{\partial\Sigma'}= \begin{cases} \frac{\partial Q}{\partial\sigma'_{ii}}=\frac{n}{2}\{A'\}_{ii}\\ \frac{\partial Q}{\partial\sigma'_{ij}}=n\{A'\}_{ij} \end{cases} \] \[ where\text{ } A'=\Sigma'^{-1}(S^*-\Sigma'+(\mu'-2\bar{X^*})\mu'^T)\Sigma'^{-1}\]
Since \[\frac{\partial Q\{(\mu',\Sigma'),(\mu,\Sigma)\}}{\partial (\mu',\Sigma')}\vert_{(\mu',\Sigma')=(\mu, \Sigma)}=\nabla\ell_{obs}(\mu,\Sigma)\] Therefore, \[\frac{\partial\ell}{\partial\mu}=\frac{\partial Q}{\partial\mu'}|_{(\mu'=\mu,\Sigma'=\Sigma)}=-\frac{n}{2}[\Sigma^{-1}(-2\bar{X}^*+2\mu)]\] \[ \frac{\partial\ell}{\partial\Sigma}=\frac{\partial Q}{\partial\Sigma'}|_{(\mu'=\mu,\Sigma'=\Sigma)}= \begin{cases} \frac{\partial Q}{\partial\sigma'_{ii}}|_{(\mu'=\mu,\Sigma'=\Sigma)}=\frac{n}{2}\{A\}_{ii}\\ \frac{\partial Q}{\partial\sigma'_{ij}}|_{(\mu'=\mu,\Sigma'=\Sigma)}=n\{A\}_{ij} \end{cases} \] \[ where\text{ } A=\Sigma^{-1}(S^*-\Sigma+(\mu-2\bar{X^*})\mu^T)\Sigma^{-1}\]
# Data
data2 <- read.table(file="C:/RClass/trivariatenormal.txt", header=TRUE)
attach(data2)
# Expected Function
my_EXP <- function(data, mu, sig){
n=dim(data)[1]
p=dim(data)[2]
sum.ex.x = matrix(0,p,1)
sum.ex.xx = matrix(0,p,p)
for (i in 1:n){
obs = which(!is.na(data[i,]))
mis = which(is.na(data[i,]))
y = as.matrix(data[i,], nrow=1)
y.o = as.matrix(y[obs])
mu.o = as.matrix(mu[obs])
sig.oo = as.matrix(sig[obs, obs])
if (length(mis)!=0){
mu.m = as.matrix(mu[mis])
sig.om = as.matrix(sig[mis, obs])
sig.mm = as.matrix(sig[mis, mis])
sig.mo = t(sig.om)
if (sum(is.na(data[i,])) %% 2 == 0) {t=t(sig.mo)} else {t=sig.mo}
ym.star = mu.m + (t %*% solve(sig.oo) %*% (y.o-mu.o))
ex.x = matrix(0,p,1)
ex.x[obs] = y.o
ex.x[mis] = ym.star
ex.xx = matrix(0,p,p)
ex.xx[obs, obs] = y.o %*% t(y.o)
ex.xx[mis, obs] = y.o %*% t(ym.star)
ex.xx[obs, mis] = ym.star %*% t(y.o)
ex.xx[mis, mis] = sig.mm - (t %*% solve(sig.oo) %*% t(t)) + ym.star %*% t(ym.star)
}
else{
ex.x = y.o
ex.xx = y.o %*% t(y.o)
}
sum.ex.x = sum.ex.x + ex.x
sum.ex.xx = sum.ex.xx + ex.xx
}
list(sum.ex.x=sum.ex.x, sum.ex.xx=sum.ex.xx)
}
# Matrix -> Vector (Sigma)
sig_fun <- function(A, data){
n=dim(data)[1]
p=dim(data)[2]
V = numeric(p*(p+1)/2)
cnt = 0
for(i in 1:nrow(A)){
for (j in 1:i){
cnt = cnt + 1
V[cnt] = A[i,j]
if(i==j){V[cnt]=(n/2)*A[i,j]}
else if(i!=j){V[cnt]=n*A[i,j]}
}
}
V = matrix(V, ncol=1)
return(V)
}
# EM Function
my_EM <- function(data, mu, sig, maxit){
header = paste0("It", " mu1", " mu2", " sig11", " sig13",
" ||gradient||")
print(header)
n=dim(data)[1]
p=dim(data)[2]
for (it in 1:maxit){
if (it==1|it==2|it==3|it==33|it==34|it==35){
print("-----------------------------------------------------------")
}
com = my_EXP(data=data ,mu=mu, sig=sig)
sum.ex.x = com$sum.ex.x
sum.ex.xx = com$sum.ex.xx
xbstar = (1/n)*sum.ex.x
sstar = (1/n)*sum.ex.xx
mu.h = xbstar
sig.h = sstar - xbstar%*%t(xbstar)
gradm = (n/2)*(solve(sig)%*%(2*xbstar-2*mu))
gs = solve(sig)%*%(sstar-sig+(mu-2*xbstar)%*%t(mu))%*%solve(sig)
grads = sig_fun(A=gs, data=data)
gradms = matrix(c(gradm, grads), ncol=1)
gradnorm = norm(gradms, type="2")
if (it==1|it==2|it==3|it==33|it==34|it==35){
print(sprintf('%02.0f %.6f %.6f %.6f %.6f %2.1e',
it, mu.h[1], mu.h[3], sig.h[1,1], sig.h[1,3], gradnorm))
}
if (gradnorm < 10^-6){break}
mu = mu.h
sig = sig.h
}
return(list(mu=mu, sig=sig))
}
mu_ini = c(0,0,0)
sig_ini = diag(1,3,3)
a <- my_EM(data=data2, mu=mu_ini, sig=sig_ini, maxit=50)
## [1] "It mu1 mu2 sig11 sig13 ||gradient||"
## [1] "-----------------------------------------------------------"
## [1] "01 0.596458 7.686042 1.414481 0.514963 2.0e+03"
## [1] "-----------------------------------------------------------"
## [1] "02 0.800612 8.834786 1.353736 0.933478 2.6e+01"
## [1] "-----------------------------------------------------------"
## [1] "03 0.849187 8.976123 1.341558 1.137180 5.8e+01"
## [1] "-----------------------------------------------------------"
## [1] "33 0.878601 9.025745 1.413132 1.318472 3.7e-06"
## [1] "-----------------------------------------------------------"
## [1] "34 0.878601 9.025745 1.413132 1.318472 1.7e-06"
## [1] "-----------------------------------------------------------"
## [1] "35 0.878601 9.025745 1.413132 1.318472 8.4e-07"
a
## $mu
## [,1]
## [1,] 0.878601
## [2,] 2.850157
## [3,] 9.025745
##
## $sig
## [,1] [,2] [,3]
## [1,] 1.413132 1.0015800 1.3184716
## [2,] 1.001580 0.7779349 0.7043226
## [3,] 1.318472 0.7043226 2.5224430