Question 2: Log-likelihood surface for trinomial probabilities

#load libraries
library(ggplot2)
library(geometry)
## Loading required package: magic
## Loading required package: abind
library(klaR)
## Loading required package: MASS
library(RColorBrewer)
library(ggtern)
## 
## Attaching package: 'ggtern'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, %+replace%, aes, calc_element, geom_segment,
##     ggplot_build, ggplot_gtable, ggsave, opts, theme, theme_bw,
##     theme_classic, theme_get, theme_gray, theme_grey,
##     theme_minimal, theme_set, theme_update
library(reshape2)
library(grid)

#to view how the log-likelihood surface changes, we want to output a log-likelihood value for every combination of the 3 values of phi.
phi_grid <- expand.grid(Pa=seq(0,1,by=0.01),Pb=seq(0,1,by=0.01))
phi_grid <- phi_grid[phi_grid$Pa > 0 & phi_grid$Pb > 0 & phi_grid$Pa + phi_grid$Pb < 1,]
phi_grid$Po <- 1 - (phi_grid$Pa + phi_grid$Pb)

#Y is vector of observed data (phenotype counts), phi our initial guess of parameter values
Y <- c(186,38,13,284)
phi <- c(1/3,1/3,1/3)

#log-likelihood of observed data
log_like <- function(Y,phi){
  tmp = dmultinom(x=Y,prob=c(phi[1]^2 + 2*phi[1]*phi[3],phi[2]^2 + 2*phi[2]*phi[3],2*phi[1]*phi[2],phi[3]^2),log=TRUE)
  return(tmp)
}

#attach the log-likelihood to each combination
ll_grid <- apply(phi_grid,1,function(x) log_like(Y=Y,phi=unlist(x)))
phi_grid$ll <- ll_grid

#Values of phi that maximize likelihood function
phi_grid[phi_grid$ll==max(phi_grid$ll),]
##       Pa   Pb   Po        ll
## 527 0.21 0.05 0.74 -8.409508

Contour plot of log-likelihood surface Here is a simple 2D plot of the log-likelihood surface, where log-likelihood is expressed as contour levels. Although we can see that the value of \(\pi_{A}\) and \(\pi_{B}\) that maximize the likelihood function are .21 and .05, respectively, we cannot see clearly what the relationship is to \(\pi_{O}\).

ggplot(data=phi_grid) + stat_contour(aes(x=Pa,y=Pb,z=ll,colour=..level..),binwidth=50,size=1.05) + scale_color_gradient(low="green",high="red") + theme_bw() + labs(colour="Log-likelihood",x=expression(~pi[A]),y=expression(~pi[B]),title="Contour Plot of Log-likelihood Surface") + theme(title=element_text(size=14),legend.title=element_text(size=10),axis.title=element_text(size=18))

Plot the trinomial log-likelihood surface in barycentric coordinates When we display the trinomial log-likelihood surface from 3D cartesian to barycentric coordinates we are able to preserve the original coordinate space; we simply project it onto the triangle definied by the simplex for an equilateral triangle. Here it is clear that \(\pi_{O}\) must take a relatively large value to maximize the likelihood function, \(\pi_{A}\) somewhat smaller and \(\pi_{B}\) will be a very small value.

ggtern(data=phi_grid,(aes(x=Pa,y=Pb,z=Po))) + geom_interpolate_tern(aes(value=ll,fill=..level..),colour="black",method="lm",binwidth=100,buffer=1.5,n=300) + theme_bw() + theme(legend.position=c(0,1),legend.justification=c(0,1)) + scale_fill_gradient(low="green",high="red") + labs(fill="Log-likelihood",colour="Log-likelihood",title="Trinomial Log-likelihood Surface \n Barycentric Coordinates") + Llab(expression(paste(pi[A],",0,0"))) + Tlab(expression(paste("0,",pi[B],",0"))) + Rlab(expression(paste("0,0,",pi[O]))) + theme(axis.tern.title=element_text(size=16),axis.tern.arrow.text=element_blank(),title=element_text(size=13),axis.tern.text=element_text(size=11.5))

Question 4: Software implementation of EM algorithm. The EM-algorithm takes 4 arguments. The first argument is the observed phenotype counts, the second argument is our starting guesses for the allele frequencies, the third argument is the tolerance for the stopping rule (smaller values produce a more conservative stopping rule), and the last argument will print out status at each iteration.

#E-step of algorithm
e_step <- function(Y,phi){
  Xaa = Y[1]*(phi[1]^2 / (phi[1]^2 + 2*phi[1]*phi[3]))
  Xao = Y[1]*(2*phi[1]*phi[3] / (phi[1]^2 + 2*phi[1]*phi[3]))
  Xbb = Y[2]*(phi[2]^2 / (phi[2]^2 + 2*phi[2]*phi[3]))
  Xbo = Y[2]*(2*phi[2]*phi[3] / (phi[2]^2 + 2*phi[2]*phi[3]))
  Xab = Y[3]
  Xoo = Y[4]
  return(c(Xaa,Xao,Xbb,Xbo,Xab,Xoo))
}

#M-step of algorithm
m_step <- function(e_out,n){
  Pa = (2*e_out[1]+e_out[2]+e_out[5]) / (2*n)
  Pb = (2*e_out[3]+e_out[4]+e_out[5]) / (2*n)
  Po = (e_out[2]+e_out[4]+2*e_out[6]) / (2*n)
  return(c(Pa,Pb,Po))
}

#EM algorithm wrapper with stopping criteria
EM_abo <- function(Y,phi,stop_rule=1e-10,status=TRUE){
  
  #prepare state variables
  phi_init <- phi
  n<-sum(Y)
  iter<-0
  tolerance<-1
  loglike_init <- log_like(Y=Y,phi=phi_init)
  loglike_lag <- log_like(Y=Y,phi=phi_init)
  track<-list()
  
  #loop until convergence
  while(TRUE){
    
    #E and M-steps
    est_X = e_step(Y=Y,phi=phi)
    phi_new = m_step(e_out=est_X,n=n)
    
    #observed data log-likelihood
    loglike_current <- log_like(Y=c(sum(est_X[1:2]),sum(est_X[3:4]),est_X[5],est_X[6]),phi=phi_new)
    
    #tolerance via change in phi estimates
    tolerance = max(abs(phi_new[1]-phi[1]),abs(phi_new[2]-phi[2]),abs(phi_new[3]-phi[3]))
    
    #check for convergence
    if(tolerance <= stop_rule & abs(loglike_current - loglike_lag) < (abs(loglike_lag) + abs(loglike_current)) * stop_rule) break
    
    #update iteration and list of phi and log-likelihood
    phi <- phi_new
    iter <- iter+1
    loglike_lag <- loglike_current
    track[[iter]] <- c(phi_new,loglike_current)
    #print status at each iteration
    if(status==TRUE){
    print(paste("Iteration",iter,"Allele Frequencies: Pa=",round(phi_new[1],10),"Pb=",round(phi_new[2],10),"Po=",round(phi_new[3],10)))
    }
  }
  return(matrix(c(phi_init,loglike_init,unlist(track)),ncol=4,byrow=TRUE,dimnames=list(NULL,c("Pa","Pb","Po","loglike"))))
}

Question 5: Application of EM algorithm.

#Running the algorithm
EM_result <- EM_abo(Y=Y,phi=phi,stop_rule=1e-36)
## [1] "Iteration 1 Allele Frequencies: Pa= 0.2504798464 Pb= 0.0611004479 Po= 0.6884197057"
## [1] "Iteration 2 Allele Frequencies: Pa= 0.2184543643 Pb= 0.0504939376 Po= 0.7310516981"
## [1] "Iteration 3 Allele Frequencies: Pa= 0.2141823338 Pb= 0.0501617336 Po= 0.7356559326"
## [1] "Iteration 4 Allele Frequencies: Pa= 0.2136619451 Pb= 0.0501466687 Po= 0.7361913862"
## [1] "Iteration 5 Allele Frequencies: Pa= 0.2135994448 Pb= 0.050145474 Po= 0.7362550812"
## [1] "Iteration 6 Allele Frequencies: Pa= 0.2135919575 Pb= 0.0501453459 Po= 0.7362626966"
## [1] "Iteration 7 Allele Frequencies: Pa= 0.213591061 Pb= 0.0501453309 Po= 0.7362636081"
## [1] "Iteration 8 Allele Frequencies: Pa= 0.2135909537 Pb= 0.0501453291 Po= 0.7362637172"
## [1] "Iteration 9 Allele Frequencies: Pa= 0.2135909408 Pb= 0.0501453289 Po= 0.7362637303"
## [1] "Iteration 10 Allele Frequencies: Pa= 0.2135909393 Pb= 0.0501453289 Po= 0.7362637318"
## [1] "Iteration 11 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 12 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 13 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 14 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 15 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 16 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 17 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 18 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
## [1] "Iteration 19 Allele Frequencies: Pa= 0.2135909391 Pb= 0.0501453289 Po= 0.736263732"
#Graphical summaries of results
gg_data <- data.frame(cbind(iter=seq_len(nrow(EM_result)),EM_result))
gg_data1 <- melt(gg_data,id.vars="iter",measure.vars=c("Pa","Pb","Po"))
p1 <- ggplot(data=gg_data1,aes(x=iter,y=value))+geom_line(aes(colour=variable),size=1.05)+guides(colour=guide_legend("Allele Frequencies"),linetype=guide_legend("Allele Frequencies"))+scale_color_discrete(labels=c(expression(~pi[A]),expression(~pi[B]),expression(~pi[O])))+labs(title="EM Algorithm",x="Iteration",y="Frequency")+theme(legend.position="bottom",axis.title=element_text(size=12),title=element_text(size=12),legend.title=element_text(size=11),legend.text=element_text(size=15))

gg_data2 <- melt(gg_data,id.vars="iter",measure.vars="loglike")
p2 <- ggplot(data=gg_data2,aes(x=iter,y=value))+geom_line(size=1.05,colour="#FF9999")+labs(title="EM Algorithm",x="Iteration",y="Log-Likelihood")+theme(legend.position="bottom",axis.title=element_text(size=12),title=element_text(size=12))

#Comparison with Nelder-Mead optimization via optim
#function to be maximized
iter <- 0
trace <- list()
like_trace <- list()
NM_minimize <- function(phi){
  iter <<- iter+1
  trace[[iter]] <<- phi
  Pa <- phi[1]
  Pb <- phi[2]
  Po <- phi[3]
  if(Pa <= 0 || Pa >= 1 || Pb <= 0 || Pb >= 1 || Po <= 0 || Po >= 1) return(-Inf)
  tmp <- log_like(Y=c(186,38,13,284),phi=phi)
  like_trace[[iter]] <<- tmp
  return(tmp)
}

#Running the Nelder-Mead optimization algorithm
optim(par=phi,fn=NM_minimize,method="Nelder-Mead",control=list(fnscale=-1,trace=TRUE))
##   Nelder-Mead direct search function minimizer
## function value for initial parameters = 386.455100
##   Scaled convergence tolerance is 5.75863e-06
## Stepsize computed as 0.033333
## BUILD              4 414.515263 352.029284
## EXTENSION          6 395.556462 307.004820
## EXTENSION          8 386.455100 259.737945
## EXTENSION         10 352.029284 172.666464
## EXTENSION         12 307.004820 72.485265
## REFLECTION        14 259.737945 50.152991
## HI-REDUCTION      16 172.666464 50.152991
## REFLECTION        18 167.214613 39.708811
## HI-REDUCTION      20 99.403771 39.708811
## HI-REDUCTION      22 72.485265 39.708811
## EXTENSION         24 70.929607 24.480800
## REFLECTION        26 50.152991 9.765373
## LO-REDUCTION      28 39.708811 9.765373
## LO-REDUCTION      30 24.480800 9.765373
## HI-REDUCTION      32 15.527634 9.765373
## HI-REDUCTION      34 14.396957 9.765373
## HI-REDUCTION      36 10.870265 9.765373
## HI-REDUCTION      38 10.676396 9.201381
## REFLECTION        40 10.468644 8.978360
## HI-REDUCTION      42 9.765373 8.942434
## REFLECTION        44 9.201381 8.459513
## LO-REDUCTION      46 8.978360 8.426242
## HI-REDUCTION      48 8.942434 8.426242
## LO-REDUCTION      50 8.516199 8.386578
## HI-REDUCTION      52 8.459513 8.386578
## LO-REDUCTION      54 8.426242 8.386578
## HI-REDUCTION      56 8.420708 8.386578
## REFLECTION        58 8.394087 8.382627
## REFLECTION        60 8.389906 8.376334
## HI-REDUCTION      62 8.386578 8.374330
## LO-REDUCTION      64 8.382627 8.374330
## HI-REDUCTION      66 8.376334 8.374330
## REFLECTION        68 8.375460 8.373809
## HI-REDUCTION      70 8.375311 8.373161
## HI-REDUCTION      72 8.374330 8.372861
## HI-REDUCTION      74 8.373809 8.372861
## HI-REDUCTION      76 8.373161 8.372861
## HI-REDUCTION      78 8.373074 8.372754
## HI-REDUCTION      80 8.372936 8.372743
## HI-REDUCTION      82 8.372861 8.372709
## HI-REDUCTION      84 8.372754 8.372661
## LO-REDUCTION      86 8.372743 8.372661
## REFLECTION        88 8.372709 8.372659
## LO-REDUCTION      90 8.372680 8.372652
## HI-REDUCTION      92 8.372661 8.372633
## REFLECTION        94 8.372659 8.372631
## LO-REDUCTION      96 8.372652 8.372631
## Exiting from Nelder Mead minimizer
##     98 function evaluations used
## $par
## [1] 0.17549914 0.04120578 0.60498111
## 
## $value
## [1] -8.372631
## 
## $counts
## function gradient 
##       98       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
#Graphical summary of results
gg_data3 <- matrix(unlist(trace),ncol=3,byrow=TRUE,dimnames=list(NULL,c("Pa","Pb","Po")))
gg_data3 <- data.frame(cbind(iter=seq_len(iter),gg_data3))
gg_data4 <- melt(gg_data3,id.vars="iter")
p3 <- ggplot(data=gg_data4,aes(x=iter,y=value,colour=variable))+geom_line(size=1.05)+labs(title="Nelder-Mead",x="Iteration",y="Frequency")+guides(colour=guide_legend("Allele Frequencies"))+scale_color_discrete(labels=c(expression(~pi[A]),expression(~pi[B]),expression(~pi[O])))+theme(legend.position="bottom",axis.title=element_text(size=12),title=element_text(size=12),legend.title=element_text(size=11),legend.text=element_text(size=15))

gg_data5 <- data.frame(cbind(iter=1:length(unlist(like_trace)),unlist(like_trace)))
p4 <- ggplot(data=gg_data5,aes(x=iter,y=V2))+geom_line(size=1.05,colour="#FF9999")+labs(title="Nelder-Mead",x="Iteration",y="Log-Likelihood")+theme(legend.position="bottom",axis.title=element_text(size=12),title=element_text(size=12),legend.title=element_text(size=11))

Graphical Summaries and Comparison of EM Algorithm vs. Nelder-Mead Optimization The EM-algorithm is very robust to different starting values; when \(\phi=(\pi_{A}=\frac{1}{3},\pi_{B}=\frac{1}{3},\pi_{O}=\frac{1}{3})\) and the stopping rule tolerance is set to 1e^{-36} the algorithm converges after 20 iterations. If we set \(\phi=(\pi_{A}=.01,\pi_{B}=.01,\pi_{O}=.98)\), the algorithm converges after 18 iterations, and gives the same answer for allele frequencies. In comparison the Nelder-Mead optimization function takes 98 iterations for convergence and gives results for \(\phi\) that are very inaccurate compared to the EM-algorithm. A graphical comparison of the two methods is shown below. Note the difference in y and x-axis scales.

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, nrow = ceiling(numPlots/cols))
  }
 if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

multiplot(p1,p3,cols=2)

multiplot(p2,p4,cols=2)