(a)

Square Root of Matrix

sqrtm <- function(A){
  a = eigen(A)
  sqm = a$vectors %*% diag(sqrt(a$values)) %*% t(a$vectors)
  sqm = (sqm+t(sqm))/2
}

Generating Data Function

gen <- function(n, p, mu, sig, seed = 2022){
  set.seed(seed)
  x = matrix(rnorm(n*p), n, p)
  datan = x %*% sqrtm(sig) + matrix(mu, n, p, byrow=TRUE)
  datan
}

Vector <=> Matrix

MtoV <- function(M){
  V = numeric(p*(p+1)/2)
  cnt = 0
  for(i in 1:nrow(M)){
    for (j in 1:i){
      cnt = cnt + 1    
      V[cnt] = M[i,j]
    }
  }
  V = matrix(V, ncol=1)
  return(V)
}

VtoM <- function(V){
  p = (-1 + sqrt(1+8*nrow(V)))/2
  M = matrix(0,p,p)
  cnt = 0
  for(i in 1:nrow(M)){
    for (j in 1:i){
      cnt = cnt + 1
      M[i,j] = V[cnt,1]
      M[j,i] = M[i,j]  
    }
  }  
  return(M) 
}

Sig Function

sig_fun <- function(A){
  V = numeric(p*(p+1)/2)
  cnt = 0
  for(i in 1:nrow(A)){
    for (j in 1:i){
      cnt = cnt + 1    
      V[cnt] = A[i,j]
  if(i==j){V[cnt]=(-1/2)*A[i,j]}
  else if(i!=j){V[cnt]=-A[i,j]}
  }
  }
  V = matrix(V, ncol=1)
  return(V)
}

Gradient / Loglikelihood

Llike <- function (x, mu, sig) {
  a = dim(x)
  n = a[1]
  p = a[2]
  if (any(eigen(sig)$values < 0)) { 
    l = NA
    gradms = NA
    gradnorm = NA
  } else {
    siginv = solve(sig)
    C = matrix(0,p,p)
    sxm = matrix(0,p,1)
    for (i in 1:n){
      xm = x[i,] - mu
      sxm = sxm + xm
      C = C + xm %*% t(xm)
    }
    gradm = siginv %*% sxm
    A = n*siginv - (siginv %*% (C %*% siginv))
    grads = sig_fun(A)
    gradms = matrix(c(gradm, grads), ncol=1) 
    log_det_sig <- log(det(sig))
    l = -(n*p*log(2*pi)+n*log_det_sig + sum(siginv*C))/2
    gradnorm = norm(gradms, type='2')
  }
  list(l = l, gradms = gradms, gradnorm = gradnorm)
}

Hessian

Hess <- function(x, mu, sig){
  a = dim(x)
  n = a[1]
  p = a[2]
  siginv = solve(sig)   
  hess.mu.lt = -n*siginv
  hess = matrix(0,p+(p*(p+1)/2),p+(p*(p+1)/2))
  hess.sigsig.rb = matrix(0,p*(p+1)/2,p*(p+1)/2)
  hess.musig.rt = matrix(0,p,p*(p+1)/2)
  hess.sigmu.lb = matrix(0,p*(p+1)/2,p)
  C = matrix(0,p,p)
  for (i in 1:n){
    xm = x[i,] - mu
    C = C + xm %*% t(xm)
  }
  Z = (n/2)*siginv-siginv%*%C%*%siginv 
  rn = 0
  cn = 0
  for(i in 1:p){
    for(j in 1:i){
      rn = rn + 1 
      cn = 0 
      for(k in 1:p){
        for(l in 1:k){ 
          cn = cn + 1
          if(i==j && k==l){
            hess.sigsig.rb[rn,cn] = Z[k,i]*siginv[i,k]
          }
          else if (i!=j && k==l){
            hess.sigsig.rb[rn,cn]=Z[k,i]*siginv[j,k]+Z[k,j]*siginv[i,k]
          }
          else if(i==j && k!=l){hess.sigsig.rb[rn,cn]=Z[l,i]*siginv[i,k]+Z[k,i]*
                                                      siginv[i,l]
          }
          else if(i!=j && k!=l){
            hess.sigsig.rb[rn,cn]=Z[k,i]*siginv[j,l]+Z[l,j]*siginv[i,k]+Z[k,j]*
                                  siginv[i,l]+Z[l,i]*siginv[j,k]
          }
        } 
      }
    }
  }
  sxm = matrix(0,p,1)
  for (i in 1:n){
    xm = x[i,] -mu
    sxm = sxm + xm
    D = -siginv%*%sxm 
  }
  rn = 0
  cn = 0
  for(i in 1:p){
    rn = rn + 1 
    cn = 0 
    for(k in 1:p){
      for(l in 1:k){ 
        cn = cn+1 
        if(k==l){
          hess.musig.rt[rn,cn] = siginv[i,k]*D[k,]
        }
        else if (k!=l){hess.musig.rt[rn,cn]=siginv[i,k]*D[l,]+siginv[i,l]*D[k,]} 
      }
    }
  }
  hess=cbind(rbind(hess.mu.lt,t(hess.musig.rt)),rbind(hess.musig.rt,hess.sigsig.rb))
  hess=.5*(hess+t(hess))
  return(hess)  
}

Steepest Ascent Method

optmvn.sa <- function(x, mu, sig, tolerr, tolgrad, maxit, method){
  header = paste0("Iteration", "        Halving", "       log-likelihood", 
                  "          ||gradient||")
  print(header)
  n = nrow(x) 
  p = ncol(x)
  for(it in 1:maxit) {
    theta = matrix(c(mu, MtoV(sig)), ncol=1)
    a = Llike(x=datan, mu=mu, sig=sig)
    print("-----------------------------------------------------------------")
    print(sprintf('%2.0f              --              %.4f               %.1e',
                  it,  a$l, a$gradnorm))
    if(method == "SA") dir <- a$gradms
    thetan = theta + dir
    mu1 = thetan[1:p,]
    sig1 = thetan[(p+1):nrow(thetan), ]     
    atmp = Llike(x=datan, mu=matrix(mu1,ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
    if (is.na(atmp$l)) {
      atmp_new = -Inf
    } else {
      atmp_new = atmp$l
    }   
    halve = 0;
    print(sprintf('%2.0f              %2.0f              %.4f               %.1e', 
                  it, halve, atmp$l, atmp$gradnorm))
    while(atmp_new < a$l & halve < 20) {
      halve = halve + 1
      thetan = theta + dir/(2^halve)
      mu1 = thetan[1:p,]
      sig1 = thetan[(p+1):nrow(thetan), ]
      atmp = Llike(x=datan, mu=matrix(mu1, ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
        print(sprintf('%2.0f              %2.0f              %.4f               %.1e', 
                      it, halve, atmp$l, atmp$gradnorm))
      if (is.na(atmp$l)) {
        atmp_new = -Inf
      } else {
        atmp_new = atmp$l
      }       
    } 
    thetan1 = thetan
    if (halve >= 20) break
    Mre = max(abs(thetan1 - theta)/pmax(1, abs(thetan1)))
    gradnorm = atmp$gradnorm
    if (Mre < tolerr && gradnorm < tolgrad){ break }
    mu1 = thetan1[1:p, ]
    sig1 = thetan1[(p+1):nrow(thetan1), ]
    mu = matrix(mu1, ncol=1) 
    sig = VtoM(matrix(sig1,ncol=1)) 
  }
  return(list(mu = mu, sig = sig))
} 

Newton’s Method

optmvn.new <- function (x, mu, sig, tolerr, tolgrad, maxit, method){
  header = paste0("Iteration", "        Halving", "       log-likelihood", 
                  "        ||gradient||")
  print(header)
  n = nrow(x)
  p = ncol(x)
  for(it in 1:maxit) {
    theta = matrix(c(mu, MtoV(sig)), ncol=1)
    a = Llike(x=datan, mu=mu, sig=sig)
    if (it <= maxit) {
      print("-----------------------------------------------------------------")
      print(sprintf('%2.0f               --             %.4f                   %.1e', 
                    it,  a$l, a$gradnorm))
    }
    b <- Hess(x=datan, mu=mu, sig=sig)
    hessinv  = solve(b)
    if(method == "Newton") dir <- -1*hessinv %*% a$gradms 
    thetan = theta + dir
    mu1 = thetan[1:p,]
    sig1 = thetan[(p+1):nrow(thetan), ]     
    atmp = Llike(x=datan, mu=matrix(mu1,ncol=1), sig=VtoM(matrix(sig1,ncol=1))) 
    if (is.na(atmp$l)) {
      atmp_new = -Inf
    } else {
      atmp_new = atmp$l
    }   
    halve = 0
    print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                  it, halve, atmp$l, atmp$gradnorm))    
    while(atmp_new < a$l & halve <= 20) {
      halve = halve + 1
      thetan = theta + dir/(2^halve)
      mu1 = thetan[1:p,]
      sig1 = thetan[(p+1):nrow(thetan), ]
      atmp = Llike(x=datan, mu=matrix(mu1, ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
      print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                    it, halve, atmp$l, atmp$gradnorm))
      if (is.na(atmp$l)) {
        atmp_new = -Inf
      } else {
        atmp_new = atmp$l
      }       
    } 
    thetan1 = thetan
    if (halve >= 20) { break }
    Mre = max(abs(thetan1 - theta)/pmax(1, abs(thetan1)))
    gradnorm = atmp$gradnorm
    if (Mre < tolerr && gradnorm < tolgrad){ break }
    mu1 = thetan1[1:p, ]
    sig1 = thetan1[(p+1):nrow(thetan1), ]
    mu = matrix(mu1, ncol=1) 
    sig = VtoM(matrix(sig1,ncol=1)) 
  }
  return(list(mu = mu, sig = sig))
}

Fisher-Scoring Algorithm

optmvn.fis <- function (x, mu, sig, tolerr, tolgrad, maxit, method){
  header = paste0("Iteration", "        Halving", "       log-likelihood", 
                  "          ||gradient||")
  print(header)
  n = nrow(x)
  p = ncol(x)
  for(it in 1:maxit) {
    theta = matrix(c(mu, MtoV(sig)), ncol=1)
    a = Llike(x=datan, mu=mu, sig=sig)
    if (it <= maxit) {
      print("-----------------------------------------------------------------")
      print(sprintf('%2.0f               --             %.4f                   %.1e', 
                    it,  a$l, a$gradnorm))
    }
    if(method == "Fisher") dir <- solve(FIM(x=datan, mu=mu, sig=sig)) %*% a$gradms 
    thetan = theta + dir
    mu1 = thetan[1:p,]
    sig1 = thetan[(p+1):nrow(thetan), ]     
    atmp = Llike(x=datan, mu=matrix(mu1,ncol=1), sig=VtoM(matrix(sig1,ncol=1))) 
    if (is.na(atmp$l)) {
      atmp_new = -Inf
    } else {
      atmp_new = atmp$l
    }   
    halve = 0
    print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                  it, halve, atmp$l, atmp$gradnorm)) 
    while(atmp_new < a$l & halve <= 20) {
      halve = halve + 1
      thetan = theta + dir/(2^halve)
      mu1 = thetan[1:p,]
      sig1 = thetan[(p+1):nrow(thetan), ]
      atmp = Llike(x=datan, mu=matrix(mu1, ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
      print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                    it, halve, atmp$l, atmp$gradnorm))
      if (is.na(atmp$l)) {
        atmp_new = -Inf
      } else {
        atmp_new = atmp$l
      }       
    } 
    thetan1 = thetan
    if (halve >= 20) { break }
    Mre = max(abs(thetan1 - theta)/pmax(1, abs(thetan1)))
    gradnorm = atmp$gradnorm
    if (Mre < tolerr && gradnorm < tolgrad){ break }
    mu1 = thetan1[1:p, ]
    sig1 = thetan1[(p+1):nrow(thetan1), ]
    mu = matrix(mu1, ncol=1) 
    sig = VtoM(matrix(sig1,ncol=1)) 
  }
  return(list(mu = mu, sig = sig))
}

(b)

Generating Data

gen <- function(n, p, mu, sig, seed = 2022){
  set.seed(seed)
  x = matrix(rnorm(n*p), n, p)
  datan = x %*% sqrtm(sig) + matrix(mu, n, p, byrow=TRUE)
  datan
}

n <- 200
p <- 3
mu.d <- matrix(c(-1, 1, 2), 3, 1)
sig.d <- matrix(c(1, 0.7, 0.7, 0.7, 1, 0.7, 0.7, 0.7, 1), nrow=3, ncol=3)
datan <- gen(n, p, mu=mu.d, sig=sig.d, seed = 2022)
head(datan, n=3)  
##            [,1]       [,2]      [,3]
## [1,]  0.1947111  1.9078866  3.153741
## [2,] -2.8172598 -0.2811456 -0.352587
## [3,] -2.4052730 -1.0723173  1.237333

(c)

Estimating Mu and Sigma using Steepest Ascent Algorithm

optmvn.sa <- function(x, mu, sig, tolerr, tolgrad, maxit, method){
  header = paste0("Iteration", "        Halving", "       log-likelihood", 
                  "          ||gradient||")
  print(header)
  n = nrow(x) 
  p = ncol(x)
  for(it in 1:maxit) {
    theta = matrix(c(mu, MtoV(sig)), ncol=1)
    a = Llike(x=datan, mu=mu, sig=sig)
    if (it==1 |it==2 | it==257 | it==258) {
      print("-----------------------------------------------------------------")
      print(sprintf('%2.0f              --              %.4f               %.1e', 
                    it,  a$l, a$gradnorm))
    }
    if(method == "SA") dir <- a$gradms
    thetan = theta + dir
    mu1 = thetan[1:p,]
    sig1 = thetan[(p+1):nrow(thetan), ]     
    atmp = Llike(x=datan, mu=matrix(mu1,ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
    if (is.na(atmp$l)) {
      atmp_new = -Inf
    } else {
      atmp_new = atmp$l
    }   
    halve = 0;
    if (it==1 |it==2 | it==257 | it==258) {
    print(sprintf('%2.0f              %2.0f              %.4f               %.1e', 
                  it, halve, atmp$l, atmp$gradnorm))
    }
      while(atmp_new < a$l & halve < 20) {
      halve = halve + 1
      thetan = theta + dir/(2^halve)
      mu1 = thetan[1:p,]
      sig1 = thetan[(p+1):nrow(thetan), ]
      atmp = Llike(x=datan, mu=matrix(mu1, ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
      if (it==1 |it==2 | it==257 | it==258) {
      print(sprintf('%2.0f              %2.0f              %.4f               %.1e', 
                      it, halve, atmp$l, atmp$gradnorm))
      }
      if (is.na(atmp$l)) {
        atmp_new = -Inf
      } else {
        atmp_new = atmp$l
      }       
    } 
    thetan1 = thetan
    if (halve >= 20) break
    Mre = max(abs(thetan1 - theta)/pmax(1, abs(thetan1)))
    gradnorm = atmp$gradnorm
    if (Mre < tolerr && gradnorm < tolgrad){ break }
    mu1 = thetan1[1:p, ]
    sig1 = thetan1[(p+1):nrow(thetan1), ]
    mu = matrix(mu1, ncol=1) 
    sig = VtoM(matrix(sig1,ncol=1)) 
  }
  return(list(mu = mu, sig = sig))
}  

mu0 <- matrix(c(0, 0, 0), 3, 1)
sig0 <- matrix(c(1, 0, 0, 0, 1, 0, 0, 0, 1), nrow=3, ncol=3)
tolerr <- 10^(-6)
tolgrad <- 10^(-5)
result.c <- optmvn.sa(x=datan, mu=mu0, sig=sig0, tolerr=10^(-6), tolgrad=10^(-5), 
                      maxit=500, method="SA")
## [1] "Iteration        Halving       log-likelihood          ||gradient||"
## [1] "-----------------------------------------------------------------"
## [1] " 1              --              -1449.3804               8.7e+02"
## [1] " 1               0              NA               NA"
## [1] " 1               1              NA               NA"
## [1] " 1               2              NA               NA"
## [1] " 1               3              NA               NA"
## [1] " 1               4              NA               NA"
## [1] " 1               5              NA               NA"
## [1] " 1               6              NA               NA"
## [1] " 1               7              NA               NA"
## [1] " 1               8              NA               NA"
## [1] " 1               9              -932.6414               1.9e+02"
## [1] "-----------------------------------------------------------------"
## [1] " 2              --              -932.6414               1.9e+02"
## [1] " 2               0              NA               NA"
## [1] " 2               1              NA               NA"
## [1] " 2               2              NA               NA"
## [1] " 2               3              NA               NA"
## [1] " 2               4              NA               NA"
## [1] " 2               5              NA               NA"
## [1] " 2               6              NA               NA"
## [1] " 2               7              -1345.8250               9.1e+03"
## [1] " 2               8              -873.5158               2.6e+02"
## [1] "-----------------------------------------------------------------"
## [1] "257              --              -692.5871               6.7e-05"
## [1] "257               0              -692.5871               1.4e-01"
## [1] "257               1              -692.5871               7.0e-02"
## [1] "257               2              -692.5871               3.5e-02"
## [1] "257               3              -692.5871               1.7e-02"
## [1] "257               4              -692.5871               8.7e-03"
## [1] "257               5              -692.5871               4.3e-03"
## [1] "257               6              -692.5871               2.1e-03"
## [1] "257               7              -692.5871               1.0e-03"
## [1] "257               8              -692.5871               4.8e-04"
## [1] "257               9              -692.5871               2.1e-04"
## [1] "257              10              -692.5871               7.0e-05"
## [1] "-----------------------------------------------------------------"
## [1] "258              --              -692.5871               7.0e-05"
## [1] "258               0              -692.5871               1.5e-01"
## [1] "258               1              -692.5871               7.3e-02"
## [1] "258               2              -692.5871               3.7e-02"
## [1] "258               3              -692.5871               1.8e-02"
## [1] "258               4              -692.5871               9.1e-03"
## [1] "258               5              -692.5871               4.5e-03"
## [1] "258               6              -692.5871               2.2e-03"
## [1] "258               7              -692.5871               1.1e-03"
## [1] "258               8              -692.5871               5.0e-04"
## [1] "258               9              -692.5871               2.2e-04"
## [1] "258              10              -692.5871               7.4e-05"
## [1] "258              11              -692.5871               7.1e-06"
result.c
## $mu
##            [,1]
## [1,] -1.0402715
## [2,]  0.9439508
## [3,]  1.9896595
## 
## $sig
##           [,1]      [,2]      [,3]
## [1,] 1.0807579 0.7093078 0.7800266
## [2,] 0.7093078 0.9189939 0.6881350
## [3,] 0.7800266 0.6881350 1.0484694

(d)

Estimating Mu and Sigma using Newton’s Method

optmvn.new <- function (x, mu, sig, tolerr, tolgrad, maxit, method){
  header = paste0("Iteration", "        Halving", "       log-likelihood", 
                  "        ||gradient||")
  print(header)
  n = nrow(x)
  p = ncol(x)
  for(it in 1:maxit) {
    theta = matrix(c(mu, MtoV(sig)), ncol=1)
    a = Llike(x=datan, mu=mu, sig=sig)
    if (it <= maxit) {
      print("-----------------------------------------------------------------")
      print(sprintf('%2.0f               --             %.4f                   %.1e', 
                    it,  a$l, a$gradnorm))
    }
    b <- Hess(x=datan, mu=mu, sig=sig)
    hessinv  = solve(b)
    if(method == "Newton") dir <- -1*hessinv %*% a$gradms 
    thetan = theta + dir
    mu1 = thetan[1:p,]
    sig1 = thetan[(p+1):nrow(thetan), ]     
    atmp = Llike(x=datan, mu=matrix(mu1,ncol=1), sig=VtoM(matrix(sig1,ncol=1))) 
    if (is.na(atmp$l)) {
      atmp_new = -Inf
    } else {
      atmp_new = atmp$l
    }   
    halve = 0
    print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                  it, halve, atmp$l, atmp$gradnorm))    
    while(atmp_new < a$l & halve <= 20) {
      halve = halve + 1
      thetan = theta + dir/(2^halve)
      mu1 = thetan[1:p,]
      sig1 = thetan[(p+1):nrow(thetan), ]
      atmp = Llike(x=datan, mu=matrix(mu1, ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
      print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                    it, halve, atmp$l, atmp$gradnorm))
      if (is.na(atmp$l)) {
        atmp_new = -Inf
      } else {
        atmp_new = atmp$l
      }       
    } 
    thetan1 = thetan
    if (halve >= 20) { break }
    Mre = max(abs(thetan1 - theta)/pmax(1, abs(thetan1)))
    gradnorm = atmp$gradnorm
    if (Mre < tolerr && gradnorm < tolgrad){ break }
    mu1 = thetan1[1:p, ]
    sig1 = thetan1[(p+1):nrow(thetan1), ]
    mu = matrix(mu1, ncol=1) 
    sig = VtoM(matrix(sig1,ncol=1)) 
  }
  return(list(mu = mu, sig = sig))
}

mu0 <- matrix(c(-1.5, 1.5, 2.3), 3, 1)
sig0 <- matrix(c(1, 0.5, 0.5, 0.5, 1, 0.5, 0.5, 0.5, 1), nrow=3, ncol=3)
tolerr <- 10^(-7)
tolgrad <- 10^(-7)
result.d <- optmvn.new(x=datan, mu=mu0, sig=sig0, tolerr=10^(-7), tolgrad=10^(-7), 
                       maxit=500, method="Newton")
## [1] "Iteration        Halving       log-likelihood        ||gradient||"
## [1] "-----------------------------------------------------------------"
## [1] " 1               --             -836.6364                   3.7e+02"
## [1] " 1               0              NA                   NA"
## [1] " 1               1              NA                   NA"
## [1] " 1               2              NA                   NA"
## [1] " 1               3              NA                   NA"
## [1] " 1               4              -789.2027                   2.4e+03"
## [1] "-----------------------------------------------------------------"
## [1] " 2               --             -789.2027                   2.4e+03"
## [1] " 2               0              -843.2673                   3.4e+03"
## [1] " 2               1              -749.4449                   1.5e+03"
## [1] "-----------------------------------------------------------------"
## [1] " 3               --             -749.4449                   1.5e+03"
## [1] " 3               0              -712.0011                   5.7e+02"
## [1] "-----------------------------------------------------------------"
## [1] " 4               --             -712.0011                   5.7e+02"
## [1] " 4               0              -697.5056                   2.0e+02"
## [1] "-----------------------------------------------------------------"
## [1] " 5               --             -697.5056                   2.0e+02"
## [1] " 5               0              -693.2310                   5.7e+01"
## [1] "-----------------------------------------------------------------"
## [1] " 6               --             -693.2310                   5.7e+01"
## [1] " 6               0              -692.6078                   9.1e+00"
## [1] "-----------------------------------------------------------------"
## [1] " 7               --             -692.6078                   9.1e+00"
## [1] " 7               0              -692.5871                   3.5e-01"
## [1] "-----------------------------------------------------------------"
## [1] " 8               --             -692.5871                   3.5e-01"
## [1] " 8               0              -692.5871                   5.5e-04"
## [1] "-----------------------------------------------------------------"
## [1] " 9               --             -692.5871                   5.5e-04"
## [1] " 9               0              -692.5871                   1.4e-09"
## [1] "-----------------------------------------------------------------"
## [1] "10               --             -692.5871                   1.4e-09"
## [1] "10               0              -692.5871                   7.6e-13"
result.d
## $mu
##            [,1]
## [1,] -1.0402715
## [2,]  0.9439508
## [3,]  1.9896595
## 
## $sig
##           [,1]      [,2]      [,3]
## [1,] 1.0807577 0.7093077 0.7800265
## [2,] 0.7093077 0.9189938 0.6881349
## [3,] 0.7800265 0.6881349 1.0484692

(e)

Fisher Informaion Matrix

FIM <- function(x, mu, sig){
  a = dim(x)
  n = a[1]
  p = a[2]
  siginv = solve(sig)
  FI = matrix(0,p+(p*(p+1)/2),p+(p*(p+1)/2))
  FI.sigsig.rb = matrix(0,p*(p+1)/2,p*(p+1)/2)
  FI.musig.rt = matrix(0,p,p*(p+1)/2)
  FI.sigmu.lb = matrix(0,p*(p+1)/2,p)
  FI.mumu.lt = n*siginv
  rn = 0
  cn = 0
  for(i in 1:p){
    for(j in 1:i){
      rn = rn + 1 
      cn = 0
      for(k in 1:p){
        for(l in 1:k){ 
          cn = cn + 1
          if(i==j && k==l){
            FI.sigsig.rb[rn,cn] = (n/2)*siginv[i,k]%*%siginv[k,i]
          }
          else if (i!=j && k==l){
            FI.sigsig.rb[rn,cn]=(n/2)*(siginv[k,i]%*%siginv[j,k]+siginv[k,j]%*%
                                siginv[i,k])
          }
          else if(i==j && k!=l){
            FI.sigsig.rb[rn,cn]=(n/2)*(siginv[l,i]%*%siginv[i,k]+siginv[k,i]%*%
                                siginv[i,l])
          }
          else if(i!=j && k!=l){
            FI.sigsig.rb[rn,cn]=(n/2)*(siginv[k,i]%*%siginv[j,l]+siginv[l,j]%*%
                siginv[i,k]+siginv[k,j]%*%siginv[i,l]+siginv[l,i]%*%siginv[j,k])
          }
        } 
      }
    }
  }
  FIM=cbind(rbind(FI.mumu.lt,t(FI.musig.rt)),rbind(FI.musig.rt,FI.sigsig.rb))
  FIM=(FIM+t(FIM))/2
  return(FIM)    
}

Estimating Mu and Sigma using Fisher-scoring Method

optmvn.fis <- function (x, mu, sig, tolerr, tolgrad, maxit, method){
  header = paste0("Iteration", "        Halving", "       log-likelihood", 
                  "          ||gradient||")
  print(header)
  n = nrow(x)
  p = ncol(x)
  for(it in 1:maxit) {
    theta = matrix(c(mu, MtoV(sig)), ncol=1)
    a = Llike(x=datan, mu=mu, sig=sig)
    if (it <= maxit) {
      print("-----------------------------------------------------------------")
      print(sprintf('%2.0f               --             %.4f                   %.1e', 
                    it,  a$l, a$gradnorm))
    }
    if(method == "Fisher") dir <- solve(FIM(x=datan, mu=mu, sig=sig)) %*% a$gradms 
    thetan = theta + dir
    mu1 = thetan[1:p,]
    sig1 = thetan[(p+1):nrow(thetan), ]     
    atmp = Llike(x=datan, mu=matrix(mu1,ncol=1), sig=VtoM(matrix(sig1,ncol=1))) 
    if (is.na(atmp$l)) {
      atmp_new = -Inf
    } else {
      atmp_new = atmp$l
    }   
    halve = 0
    print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                  it, halve, atmp$l, atmp$gradnorm)) 
    while(atmp_new < a$l & halve <= 20) {
      halve = halve + 1
      thetan = theta + dir/(2^halve)
      mu1 = thetan[1:p,]
      sig1 = thetan[(p+1):nrow(thetan), ]
      atmp = Llike(x=datan, mu=matrix(mu1, ncol=1), sig=VtoM(matrix(sig1,ncol=1)))
      print(sprintf('%2.0f              %2.0f              %.4f                   %.1e', 
                    it, halve, atmp$l, atmp$gradnorm))
      if (is.na(atmp$l)) {
        atmp_new = -Inf
      } else {
        atmp_new = atmp$l
      }       
    } 
    thetan1 = thetan
    if (halve >= 20) { break }
    Mre = max(abs(thetan1 - theta)/pmax(1, abs(thetan1)))
    gradnorm = atmp$gradnorm
    if (Mre < tolerr && gradnorm < tolgrad){ break }
    mu1 = thetan1[1:p, ]
    sig1 = thetan1[(p+1):nrow(thetan1), ]
    mu = matrix(mu1, ncol=1) 
    sig = VtoM(matrix(sig1,ncol=1)) 
  }
  return(list(mu = mu, sig = sig))
}

mu0 <- matrix(c(-1.5, 1.5, 2.3), 3, 1)
sig0 <- matrix(c(1, 0.5, 0.5, 0.5, 1, 0.5, 0.5, 0.5, 1), nrow=3, ncol=3)
tolerr <- 10^(-7)
tolgrad <- 10^(-7)
result.e <- optmvn.fis(x=datan, mu=mu0, sig=sig0, tolerr=10^(-7), tolgrad=10^(-7), 
                       maxit=500, method="Fisher")
## [1] "Iteration        Halving       log-likelihood          ||gradient||"
## [1] "-----------------------------------------------------------------"
## [1] " 1               --             -836.6364                   3.7e+02"
## [1] " 1               0              -736.8183                   9.6e+01"
## [1] "-----------------------------------------------------------------"
## [1] " 2               --             -736.8183                   9.6e+01"
## [1] " 2               0              -692.5871                   3.8e-13"
## [1] "-----------------------------------------------------------------"
## [1] " 3               --             -692.5871                   3.8e-13"
## [1] " 3               0              -692.5871                   3.3e-13"
result.e
## $mu
##            [,1]
## [1,] -1.0402715
## [2,]  0.9439508
## [3,]  1.9896595
## 
## $sig
##           [,1]      [,2]      [,3]
## [1,] 1.0807577 0.7093077 0.7800265
## [2,] 0.7093077 0.9189938 0.6881349
## [3,] 0.7800265 0.6881349 1.0484692