Theme Song
# install.packages("remotes")
library('BBmisc', 'rmsfuns')
#remotes::install_github("rstudio/sass")
lib('sass')
## sass
## TRUE
/* https://stackoverflow.com/a/66029010/3806250 */
h1 { color: #002C54; }
h2 { color: #2F496E; }
h3 { color: #375E97; }
h4 { color: #556DAC; }
h5 { color: #92AAC7; }
/* ----------------------------------------------------------------- */
/* https://gist.github.com/himynameisdave/c7a7ed14500d29e58149#file-broken-gradient-animation-less */
.hover01 {
/* color: #FFD64D; */
background: linear-gradient(155deg, #EDAE01 0%, #FFEB94 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #EDAE01 20%, #FFEB94 80%);
}
}
.hover02 {
color: #FFD64D;
background: linear-gradient(155deg, #002C54 0%, #4CB5F5 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #002C54 20%, #4CB5F5 80%);
}
}
.hover03 {
color: #FFD64D;
background: linear-gradient(155deg, #A10115 0%, #FF3C5C 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #A10115 20%, #FF3C5C 80%);
}
}
## https://stackoverflow.com/a/36846793/3806250
options(width = 999)
knitr::opts_chunk$set(class.source = 'hover01', class.output = 'hover02', class.error = 'hover03')
if(!suppressPackageStartupMessages(require('BBmisc'))) {
install.packages('BBmisc', dependencies = TRUE, INSTALL_opts = '--no-lock')
}
suppressPackageStartupMessages(require('BBmisc'))
# suppressPackageStartupMessages(require('rmsfuns'))
pkgs <- c('devtools', 'knitr', 'kableExtra', 'tidyr',
'readr', 'lubridate', 'reprex', 'magrittr',
'timetk', 'plyr', 'dplyr', 'stringr',
'tdplyr', 'tidyverse', 'formattable',
'echarts4r', 'paletteer')
suppressAll(lib(pkgs))
# load_pkg(pkgs)
## Set the timezone but not change the datetime
Sys.setenv(TZ = 'Asia/Tokyo')
## options(knitr.table.format = 'html') will set all kableExtra tables to be 'html', otherwise need to set the parameter on every single table.
options(warn = -1, knitr.table.format = 'html')#, digits.secs = 6)
## https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-abnor-in-rmd-but-not-in-r-script
knitr::opts_chunk$set(message = FALSE, warning = FALSE)#,
#cache = TRUE, cache.lazy = FALSE)
rm(pkgs)
課題をすぐに提出してください
課題の提出期限は、6月21日 15:59 JST
ですが、可能であれば1日か2日早く提出してください。 早い段階で提出すると、他の受講生のレビューを時間内に得る可能性が高くなります。
In week 2, you considered the problem faced by a biologist is interest in characterizing the number of eggs laid by a particular bird species. The data consisted of a sample \(n=300\) nests on a site in Southern California, which were contained in the file nestsize.csv
:
At the time we visually compared the empirical distribution of the data against a Poison distribution whose parameter has been set to its maximum likelihood estimator as a justification for using a mixture model of the form \(f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\) where \(\delta_0(x)\) represents the degenerate distribution placing all of its mass at zero.
You are asked to build on the EM algorithm you constructed in week 2 to compute the BIC associated with this model and contrast it against the BIC for the simpler Poisson model.
You will be asked to provide (1) code to compute the BIC for the zero-inflated mixture (which requires an EM algorithm),(2) code to compute the BIC for the Poisson model (which does not require an EM algorithm),(3) provide numerical results for the BIC for each of the two models, and (4) interpret the results in terms of the original problem.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
x <- read.csv(file = "data/nestsize.csv", header = FALSE)$V1
n <- length(x) # number of samples
mle = mean(x)
bic_base = 0
for (i in 1:n) {
bic_base = bic_base-2*log(dpois(x[i],mle))
}
bic_base = bic_base+log(n)
cat(paste('mle =', mle, '\nbic_base =', bic_base))
## mle = 1.84333333333333
## bic_base = 1272.1748691699
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
x <- read.csv("data/nestsize.csv", header = F)
n <- nrow(x)
x <- as.matrix(x)
## Run the actual EM algorithm ## Initialize the parameters
w <- 1/2 #Assign equal weight to each component to start with
lambda <- mean(x) #lambda set as sample mean
s <- 0
sw <- FALSE
QQ <- -Inf
QQ.out <- NULL
epsilon <- 10^(-5)
while(!sw){
## E step
v <- array(0, dim = c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] <- w
v[i,2] <- (1-w)*dpois(x[i], lambda)
v[i,] <- v[i,]/sum(v[i,])
}else{
v[i,1] <- 0
v[i,2] <- 1
}
}
## M step
# parameter
w <- mean(v[,1])
lambda <- sum(x)/sum(v[,2])
#convergence
QQn <- 0
for(i in 1:n){
if(x[i]==0){
QQn <- QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn <- QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw <- TRUE
}
QQ <- QQn
QQ.out <- c(QQ.out, QQ)
s <- s + 1
print(paste(s, QQn,w,lambda))
}
## [1] "1 -576.591134395629 0.368359448807043 2.91832645933181"
## [1] "2 -560.13333821497 0.390492555379738 3.02429994843094"
## [1] "3 -555.36650787464 0.396587326296866 3.05484689610648"
## [1] "4 -554.153118451655 0.398118096298951 3.06261630728294"
## [1] "5 -553.854920699012 0.398493040612906 3.06452536344982"
## [1] "6 -553.78227912419 0.398584303453195 3.06499039502518"
## [1] "7 -553.764621578937 0.398606483018032 3.06510343274719"
## [1] "8 -553.760331674038 0.398611871289296 3.06513089522601"
bic_mix <- 0
for(i in 1:n) {
if(x[i]==0){
bic_mix <- -2*log(w+(1-w)*dpois(x[i],lambda))+bic_mix
} else {
bic_mix <- -2*log((1-w)*dpois(x[i],lambda))+bic_mix
}
}
bic_mix <- bic_mix+2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
cat(paste('bic_mix =', bic_mix, '\nbic_base =', bic_base))
## bic_mix = 1056.84484815141
## bic_base = 1272.1748691699
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
rm(list = ls())
x <- read.csv(file = "data/nestsize.csv", header = FALSE)$V1
n <- length(x) # number of samples
mle = mean(x)
bic_base = 0
for (i in 1:n) {
bic_base = bic_base-2*log(dpois(x[i],mle))
}
bic_base = bic_base+log(n)
cat(paste('mle =', mle, '\nbic_base =', bic_base))
## mle = 1.84333333333333
## bic_base = 1272.1748691699
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
x = read.csv("data/nestsize.csv", header = F)
n = nrow(x)
x = as.matrix(x)
## Run the actual EM algorithm ## Initialize the parameters
w = 1/2 #Assign equal weight to each component to start with
lambda = mean(x) #lambda set as sample mean
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim = c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = w
v[i,2] = (1-w)*dpois(x[i], lambda)
v[i,] = v[i,]/sum(v[i,])
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# parameter
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
#convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn,w,lambda))
}
## [1] "1 -576.591134395629 0.368359448807043 2.91832645933181"
## [1] "2 -560.13333821497 0.390492555379738 3.02429994843094"
## [1] "3 -555.36650787464 0.396587326296866 3.05484689610648"
## [1] "4 -554.153118451655 0.398118096298951 3.06261630728294"
## [1] "5 -553.854920699012 0.398493040612906 3.06452536344982"
## [1] "6 -553.78227912419 0.398584303453195 3.06499039502518"
## [1] "7 -553.764621578937 0.398606483018032 3.06510343274719"
## [1] "8 -553.760331674038 0.398611871289296 3.06513089522601"
bic_mix = 0
for(i in 1:n) {
if(x[i]==0){
bic_mix = -2*log(w+(1-w)*dpois(x[i],lambda))+bic_mix
} else {
bic_mix = -2*log((1-w)*dpois(x[i],lambda))+bic_mix
}
}
bic_mix = bic_mix+2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
# Yes the bic for mixture model is 1056.845 compared to bic of 1272.175 for the base model.
# Despite the mixture model having more parameters the bic is much lower suggesting a better fit.
cat(paste('bic_mix =', bic_mix, '\nbic_base =', bic_base))
## bic_mix = 1056.84484815141
## bic_base = 1272.1748691699
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
rm(list = ls())
num_eggs <- read.csv(file = "data/nestsize.csv", header = FALSE)$V1
x <- num_eggs
n <- length(x) # number of samples
#For Poisson:
lambda_p = sum(num_eggs)/n
QQp = 0
for(i in 1:n){
QQp = QQp+ ((dpois(num_eggs[i], lambda=lambda_p, log=TRUE)))
}
BICPoisson = -2*QQp + log(n)
print(paste("Poisson", BICPoisson))
## [1] "Poisson 1272.1748691699"
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
dat = read.csv(file = "data/nestsize.csv", header = FALSE)
num_eggs = dat$V1
## Run the actual EM algorithm
## Initialize the parameters
KK = 2 # number of components
n = length(num_eggs) # number of samples
v = array(0, dim=c(n,KK))
v[,1] = 0.5*(num_eggs==0) #Assign half weight for nests with 0 eggs to first component
v[,2] = 1-v[,1] #Assign all of the remaining nests to the second component
w = mean(v[,1]) #weight of the first component
lambda = sum(num_eggs*v[,2])/sum(v[,2]) #parameter (mean) of the second component
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
##Checking convergence of the algorithm
while(!sw){
## E step
v = array(0, dim=c(n,KK))
v[,1] = (w/(w+(1-w)*exp(-lambda)))*(num_eggs==0) #first component is non-zero only for the zero egg nestsc
v[,2] = 1-v[,1] #second component gets rest of the weights
## M step
# Weight
w = mean(v[,1])
# lambda for the second component
lambda = sum(v[,2]*num_eggs)/sum(v[,2])
print(paste(s, w, lambda))
##Check convergence
QQn = log(w)*sum(v[,1]) #First component
for(i in 1:n){
QQn = QQn + (v[i,2]*(log(1-w)+dpois(num_eggs[i], lambda=lambda, log=TRUE)))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "0 0.315100320355003 2.6913917294994"
## [1] "1 -610.870768261843"
## [1] "1 0.371874558663767 2.93465797120389"
## [1] "2 -574.065781405036"
## [1] "2 0.391518963077006 3.02940144635373"
## [1] "3 -559.337740076761"
## [1] "3 0.396849411917633 3.05617431161587"
## [1] "4 -555.159222811549"
## [1] "4 0.398182559616356 3.06294435760827"
## [1] "5 -554.101877806045"
## [1] "5 0.398508747299135 3.06460538712117"
## [1] "6 -553.842420453926"
## [1] "6 0.398588121593604 3.06500985350962"
## [1] "7 -553.779239532299"
## [1] "7 0.398607410647733 3.06510816057561"
## [1] "8 -553.763883046672"
## [1] "8 0.398612096629121 3.06513204372942"
## [1] "9 -553.760152266609"
QQ=0
for(i in 1:n){
if (num_eggs[i]==0) {
QQ = QQ + log(w+(1-w)*dpois(num_eggs[i], lambda=lambda))
} else {
QQ = QQ + log((1-w)*dpois(num_eggs[i], lambda=lambda))
}
}
BICzeroInflated = -2*QQ + 2*log(n) #2 parameters
print(paste("Zero Inflated BIC=",BICzeroInflated))
## [1] "Zero Inflated BIC= 1056.84484814988"
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
#Poisson BIC= 1272.1748691699, Zero Inflated BIC= 1056.84484814988. Yes, the mixture model is better.
cat(paste(
"Poisson =", BICPoisson,
"\nZero Inflated BIC =",BICzeroInflated
))
## Poisson = 1272.1748691699
## Zero Inflated BIC = 1056.84484814988
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
rm(list = ls())
nestsize <- read.csv('data/nestsize.csv', header=FALSE)
x = nestsize$V1
n = nrow(nestsize)
BIC = -2*sum(dpois(x, mean(x), log = TRUE)) + 2*log(n)
#BIC = 1272.713
paste('BIC =', BIC)
## [1] "BIC = 1277.87865164456"
nestsize <- read.csv('data/nestsize.csv')
x = nestsize$X4
n = nrow(nestsize)
BIC = -2*sum(dpois(x, mean(x), log = TRUE)) + 2*log(n)
#BIC = 1272.713
paste('BIC =', BIC)
## [1] "BIC = 1272.71335370079"
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
## Run the actual EM algorithm
## Initialize the parameters
set.seed(81196)
x = nestsize$X4
w.hat = 1/2 #Assign equal weight to each component to start with
lambda.hat = mean(x) #Random cluster centers randomly spread over the support of the data
KK = 2
n = nrow(nestsize)
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,KK))
v[,1] = w.hat*factor/(w.hat*factor+(1-w.hat)*dpois(x, lambda.hat))
v[,2] = (1-w.hat)*dpois(x, lambda.hat)/(w.hat*factor+(1-w.hat)*dpois(x, lambda.hat))
## M step
# Weights
w.hat = sum(v[,1])/(sum(v[,1]) + sum(v[,2]))
lambda.hat = sum(v[,2] * x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
QQn = QQn + v[i,1]*log(w.hat) + v[i,2]*(log(1-w.hat) + dpois(x[i], lambda.hat, log = TRUE))
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## Error in w.hat * factor: non-numeric argument to binary operator
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
BIC = -2*log(w.hat*exp(QQn)) + 2*log(n)
## Error in eval(expr, envir, enclos): object 'QQn' not found
#BIC = 1116.189
paste('BIC =', BIC)
## [1] "BIC = 1272.71335370079"
俄
## Error in eval(expr, envir, enclos): object '俄' not found
#Yes
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -663.532874569727"
## [1] "2 -625.456990639752"
## [1] "3 -581.884961223092"
## [1] "4 -560.423572991847"
## [1] "5 -553.774092953591"
## [1] "6 -552.043417906285"
## [1] "7 -551.614760706599"
## [1] "8 -551.509920486443"
## [1] "9 -551.484358356205"
## [1] "10 -551.478130525885"
## [1] "11 -551.476613488737"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
x <- read.csv(file = "data/nestsize.csv", header = FALSE)$V1
n <- length(x) # number of samples
#x = dat$V1
#n = length(x)
# Poisson model
lambda_pois = mean(x)
BIC_poiss = -2 * sum(dpois(x, lambda_pois, log=TRUE)) + log(n)
# Results in BIC_poiss = 1272.175
## Initialize the parameters
w = sum(x == 0) / length(x)
lambda = mean(x[x > 0])
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
while(!sw){
## E step
v = array(0, dim=c(n,2))
v[x == 0, 1] = w
v[x == 0, 2] = (1 - w) * exp(-lambda)
v[x > 0, 2] = 1
v = v / rowSums(v)
## M step
# Weights
w = mean(v[,1])
lambda = sum(x) / sum(v[,2])
##Check convergence
QQn = sum(v[,1] * log(w) + v[,2] * (log(1 - w) + dpois(x, lambda, log=TRUE)))
if(abs(QQn-QQ)/abs(QQn) < epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -548.759553436548"
## [1] "2 -552.578963342725"
## [1] "3 -553.474276670754"
## [1] "4 -553.689918276775"
## [1] "5 -553.742192217913"
## [1] "6 -553.75488372352"
## [1] "7 -553.757966235998"
log_likelihood = 0
for(i in 1:n){
if(x[i]==0){
log_likelihood = log_likelihood + log(w + (1-w)*dpois(x[i], lambda))
} else {
log_likelihood = log_likelihood + log(1-w) + dpois(x[i], lambda, log=TRUE)
}
}
BIC = -2 * log_likelihood + 2 * log(n)
#Results in BIC = 1056.845
BIC
## [1] 1056.845
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
#Yes, as BIC for mixture model (1056.845) is smaller than BIC for Poisson model (1272.175)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
(Note that you can think about this model as a mixture with a single component).
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Derive the formula for the BIC associated with the the Poisson model
\[f(x) = w \delta_0(x) + (1-w) \frac{e^{-\lambda} \lambda^x}{x!} \quad \quad x \in \{0,1,2,\ldots\}\]
Then, provide code to compute the BIC and use it to evaluate it for the dataset nestsize.csv
:
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Is the number of effective parameters correct?
This model has only 1 parameter (the rate \(\lambda\)).
Is the MLE computed correctly for the Poisson model?
The MLE for the Poisson model can be obtained analytically and does not require the use of any numerical optimization algorithm. In this case \(\hat{\lambda}=\bar{x}\), the average of the observations, is the MLE.
lambdahat = mean(x)
n = length(x)
Is the negative log-likelihood evaluated at the MLE computed correctly for the Poisson model?
The likelihood function in this case is
\[-2\log L(\lambda) = 2n \bar{x} - 2n\bar{x} \log \bar{x} + 2 \sum_{i=1}^{n} \log x_i !\]
The following R code performs the evaluation:
twicenegloglik = 2*n*lambdahat - 2*n*lambdahat*log(lambdahat) + 2*sum(lfactorial(x))
Alternatively, you can use the built in functions to evaluate the density of the Poisson distribution,
twicenegloglik = -2*sum(dpois(x,lambdahat,log=T))
When grading, please remember that there are many ways to do the same thing in R
Is the numerical value for the dataset nestsize.csv
· BIC correct?
The MLE in this case is \(\hat{\lambda} = 1.843333\), which leads to a value of the BIC equal to \(1272.2\).
BIC1 = twicenegloglik + log(n)
Is the number of effective parameters correct?
In this case there are two parameters, \(\omega\) and \(\lambda\).
Is the EM algorithm for fitting the mixture correct?
You already derived the EM algorithm for this model in week 2 of the course. A draft version of the algorithm is provided below.
## Setup controls for the algorithm
s = 0
sw = FALSE
QQ = -Inf
QQ.out = NULL
epsilon = 10^(-5)
## Initiatlize parameters
w = 0.1 #Assign equal weight to each component to start with
lambda = mean(x) #Random cluster centers randomly spread over the support of the data
## Repeat E and M steps until convergence
while(!sw){
## E step
v = array(0, dim=c(n,2))
for(i in 1:n){
if(x[i]==0){
v[i,1] = log(w)
v[i,2] = log(1-w) + dpois(x[i], lambda, log=TRUE)
v[i,] = exp(v[i,] - max(v[i,]))/sum(exp(v[i,] - max(v[i,])))
}else{
v[i,1] = 0
v[i,2] = 1
}
}
## M step
# Weights
w = mean(v[,1])
lambda = sum(x)/sum(v[,2])
##Check convergence
QQn = 0
for(i in 1:n){
if(x[i]==0){
QQn = QQn + v[i,1]*log(w) + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}else{
QQn = QQn + v[i,2]*(log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
if(abs(QQn-QQ)/abs(QQn)<epsilon){
sw=TRUE
}
QQ = QQn
QQ.out = c(QQ.out, QQ)
s = s + 1
print(paste(s, QQn))
}
## [1] "1 -665.770652745491"
## [1] "2 -627.28059416914"
## [1] "3 -583.751999411541"
## [1] "4 -562.533341454046"
## [1] "5 -556.003801253986"
## [1] "6 -554.31143569587"
## [1] "7 -553.893589555972"
## [1] "8 -553.791684738297"
## [1] "9 -553.766907034284"
## [1] "10 -553.760886876408"
## [1] "11 -553.75942443985"
Is the negative log-likelihood evaluated at the MLE computed correctly for the mixture model.
In this case
\[-2 L \left( \hat{w}, \hat{\lambda} \right) = -2 \sum_{i=1}^{n} \log \left\{ \hat{w}\delta_0(x_i) + (1-\hat{w}) \frac{e^{-\hat{\lambda}} \hat{\lambda}^{x_i}}{x_i!} \right\}\]
Because the two components have different supports, it is preferable to consider two cases when computing the log-likelihood for each observation. For example
## Compute twice the negatie log-likelihood for the model
twicenegloglik = 0
for(i in 1:n){
if(x[i]==0){
twicenegloglik = twicenegloglik - 2*log(w + (1-w)*dpois(x[i], lambda))
}else{
twicenegloglik = twicenegloglik - 2*( log(1-w) + dpois(x[i], lambda, log=TRUE))
}
}
However, remember that there are many ways to achieve the same thing in R.
Is the numerical value for the BIC correct?
In this case, the MLEs are (rounded to 2 decimal places) \(\omega =0.40\) and \(3.07\) (you had already obtained these values in week 2). This leads to a value of the BIC equal to \(1056.85\) (rounded to two decimal places)..
BIC2 = twicenegloglik + 2*log(n)
Does the BIC provide evidence for the mixture model in the nestsize.csv
dataset?
Yes, the BIC for the mixture model is smaller than the BIC for the simpler Poisson model. This supports our original observation based on graphical evidence that the mixture model is more appropriate for this data than the simpler Poisson model.
It’s useful to record some information about how your file was created.
suppressMessages(require('dplyr', quietly = TRUE))
suppressMessages(require('magrittr', quietly = TRUE))
suppressMessages(require('formattable', quietly = TRUE))
suppressMessages(require('knitr', quietly = TRUE))
suppressMessages(require('kableExtra', quietly = TRUE))
sys1 <- devtools::session_info()$platform %>%
unlist %>% data.frame(Category = names(.), session_info = .)
rownames(sys1) <- NULL
sys2 <- data.frame(Sys.info()) %>%
dplyr::mutate(Category = rownames(.)) %>% .[2:1]
names(sys2)[2] <- c('Sys.info')
rownames(sys2) <- NULL
if (nrow(sys1) == 9 & nrow(sys2) == 8) {
sys2 %<>% rbind(., data.frame(
Category = 'Current time',
Sys.info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
} else {
sys1 %<>% rbind(., data.frame(
Category = 'Current time',
session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
}
sys <- cbind(sys1, sys2) %>%
kbl(caption = 'Additional session information:') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
row_spec(0, background = 'DimGrey', color = 'yellow') %>%
column_spec(1, background = 'CornflowerBlue', color = 'red') %>%
column_spec(2, background = 'grey', color = 'black') %>%
column_spec(3, background = 'CornflowerBlue', color = 'blue') %>%
column_spec(4, background = 'grey', color = 'white') %>%
row_spec(9, bold = T, color = 'yellow', background = '#D7261E')
rm(sys1, sys2)
sys
Category | session_info | Category | Sys.info |
---|---|---|---|
version | R version 4.1.0 (2021-05-18) | sysname | Linux |
os | Ubuntu 20.04.2 LTS | release | 5.8.0-54-generic |
system | x86_64, linux-gnu | version | #61~20.04.1-Ubuntu SMP Thu May 13 00:05:49 UTC 2021 |
ui | X11 | nodename | Scibrokes-Trading |
language | en | machine | x86_64 |
collate | en_US.UTF-8 | login | englianhu |
ctype | en_US.UTF-8 | user | englianhu |
tz | Asia/Tokyo | effective_user | englianhu |
date | 2021-05-31 | Current time | 2021-05-31 22:41:36 JST🗾 |