informationEntropy <- function(p){
-1*sum(sapply(p, function(pi){ pi*log(pi) }))
}
informationEntropy(c(0.3,0.7))
## [1] 0.6108643
informationEntropy(c(0.2,0.25,0.25,0.4))
## [1] 1.381551
informationEntropy(rep(1/3,3))
## [1] 1.098612
All estimate test set error from the training set by adding a penalty for complexity
AIC: needs flat prior or lot of data, multivariate gaussion distribution, n>>p BIC: incorporates informative priors, otherwise equal to AIC WAIC: pointwise estimate, no need for gaussian distribution
Comparisons: 1. From most general to least general: WAIC, DIC, AIC. 2. When posterior predictive mean is a good representation of the posterior predictive distribution, WAIC and DIC will tend to agree. 3. When priors are effectively flat or overwhelmed by the amount of data, the DIC and AIC will tend to agree.
library(rethinking)
data(Howell1)
d <- Howell1
d$age <- (d$age - mean(d$age))/sd(d$age)
set.seed( 1000 )
i <- sample(1:nrow(d),size=nrow(d)/2)
d1 <- d[ i , ]
d2 <- d[ -i , ]
f1 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age,
c(alpha, beta.1) ~ dnorm(mean = 0, sd = 100),
sigma ~ dunif(min = 0, max = 50)
)
f2 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age + beta.2*age^2,
c(alpha, beta.1, beta.2) ~ dnorm(mean = 0, sd = 100),
sigma ~ dunif(min = 0, max = 50)
)
f3 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age + beta.2*age^2 + beta.3*age^3,
c(alpha, beta.1, beta.2, beta.3) ~ dnorm(mean = 0, sd = 100),
sigma ~ dunif(min = 0, max = 50)
)
f4 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age + beta.2*age^2 + beta.3*age^3 + beta.4*age^4,
c(alpha, beta.1, beta.2, beta.3, beta.4) ~ dnorm(mean = 0, sd = 100),
sigma ~ dunif(min = 0, max = 50)
)
f5 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age + beta.2*age^2 + beta.3*age^3 + beta.4*age^4 + beta.5*age^5,
c(alpha, beta.1, beta.2, beta.3, beta.4, beta.5) ~ dnorm(mean = 0, sd = 100),
sigma ~ dunif(min = 0, max = 50)
)
f6 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age + beta.2*age^2 + beta.3*age^3 + beta.4*age^4 + beta.5*age^5 + beta.6*age^6,
c(alpha, beta.1, beta.2, beta.3, beta.4, beta.5, beta.6) ~ dnorm(mean = 0, sd = 100),
sigma ~ dunif(min = 0, max = 50)
)
## pick some sensible starting values for alpha and sigma
alpha.start <- mean(d$height)
sigma.start <- sd(d$height)
## fit our models
m1 <- map(flist = f1, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0))
m2 <- map(flist = f2, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0, beta.2 = 0))
m3 <- map(flist = f3, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0, beta.2 = 0, beta.3 = 0))
m4 <- map(flist = f4, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0, beta.2 = 0, beta.3 = 0, beta.4 = 0))
m5 <- map(flist = f5, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0, beta.2 = 0, beta.3 = 0, beta.4 = 0, beta.5 = 0))
m6 <- map(flist = f6, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0, beta.2 = 0, beta.3 = 0, beta.4 = 0, beta.5 = 0, beta.6 = 0))
# compare the models
compare(m1, m2, m3, m4, m5, m6)
## WAIC pWAIC dWAIC weight SE dSE
## m4 1926.1 5.7 0.0 0.58 25.51 NA
## m5 1927.7 6.5 1.6 0.26 25.50 1.12
## m6 1928.6 7.8 2.5 0.17 24.99 3.12
## m3 1952.3 5.4 26.2 0.00 24.21 10.87
## m2 2150.0 5.2 223.8 0.00 22.62 26.81
## m1 2395.3 3.3 469.2 0.00 22.94 31.12
n.trials <- 1e4
age.seq <- seq(from = -2, to = 3.5, length.out = 58)
prediction.data <- data.frame(age = age.seq)
computeMu <- function(model, data, n.trials) {
mu <- link(fit = model, data = data, n = n.trials)
return(mu)
}
computeMuMean <- function(mu) {
mu.mean <- apply(X = mu, MARGIN = 2, FUN = mean)
return(mu.mean)
}
computeMuPI <- function(mu) {
mu.PI <- apply(X = mu, MARGIN = 2, FUN = PI, prob = .97)
return(mu.PI)
}
simulateHeights <- function(model, prediction.data) {
simulated.heights <- sim(fit = model, data = prediction.data)
return(simulated.heights)
}
plotResults <- function(model, prediction.data, original.data, n.trials) {
mu <- computeMu(model, prediction.data, n.trials)
mu.mean <- computeMuMean(mu)
mu.PI <- computeMuPI(mu)
simulated.heights <- simulateHeights(model = model, prediction.data = prediction.data)
simulated.heights.PI <- apply(X = simulated.heights, MARGIN = 2, FUN = PI)
plot(height ~ age, data = original.data, col = rangi2, main = length(model@coef)-2 )
lines(x = prediction.data$age, y = mu.mean, lty = 2)
lines(x = prediction.data$age, y = mu.PI[1,], lty = 2)
lines(x = prediction.data$age, y = mu.PI[2,], lty = 2)
shade(object = simulated.heights.PI, lim = prediction.data$age)
}
## plot results
plotResults(model = m1, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plotResults(model = m2, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plotResults(model = m3, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plotResults(model = m4, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plotResults(model = m5, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plotResults(model = m6, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
modelList <- list(m1,m2,m3,m4,m5,m6)
names(modelList) <- c("m1","m2","m3","m4","m5","m6")
comparison <- compare(m1, m2, m3, m4, m5, m6)
weights <- comparison@output
simulated.heights.av <- lapply(names(modelList), function(modelname){
mm <- modelList[[modelname]]
simulated.heights <- simulateHeights(model = mm, prediction.data = prediction.data)
simulated.heights * weights[modelname,]$weight
})
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
simulated.heights.av <- Reduce('+', simulated.heights.av)
simulated.heights.PI <- apply(simulated.heights.av, 2, PI)
plot(height ~ age, data = d1, col = rangi2, main = "Ensembl" )
shade(object = simulated.heights.PI, lim = prediction.data$age, col=col.alpha("blue"))
Narrower than M4?
calculateDeviance <- function(mm, data=d2){
coefs <- coef(mm)
mu_no <- sapply(seq(1,length(coefs)-2,1), function(beta_index){
coefs[beta_index+2]*data$age^beta_index
})
mu <- rowSums(mu_no)+coefs[1]
log.likelihood <- sum( dnorm(x = data$height, mean = mu, sd = coefs[2], log = TRUE) )
-2*log.likelihood
}
modelDeviances <- sapply(modelList, calculateDeviance)
plot(y=modelDeviances,x=weights[names(modelList),]$WAIC)
f7 <- alist(
height ~ dnorm(mean = mu, sd = sigma),
mu <- alpha + beta.1*age + beta.2*age^2 + beta.3*age^3 + beta.4*age^4 + beta.5*age^5 + beta.6*age^6,
alpha ~ dnorm(mean=0,sd=100),
c(beta.1, beta.2, beta.3, beta.4, beta.5, beta.6) ~ dnorm(mean = 0, sd = 5),
sigma ~ dunif(min = 0, max = 50)
)
m_reg <- map(flist = f7, data = d1, start = list(alpha = alpha.start, sigma = sigma.start, beta.1 = 0, beta.2 = 0, beta.3 = 0, beta.4 = 0, beta.5 = 0, beta.6 = 0))
plotResults(model = m_reg, prediction.data = prediction.data, original.data = d1, n.trials = n.trials)
## [ 1000 / 10000 ]
[ 2000 / 10000 ]
[ 3000 / 10000 ]
[ 4000 / 10000 ]
[ 5000 / 10000 ]
[ 6000 / 10000 ]
[ 7000 / 10000 ]
[ 8000 / 10000 ]
[ 9000 / 10000 ]
[ 10000 / 10000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plot(precis(m_reg))
plot(precis(m6))
compare(m1,m2,m3,m4,m5,m6,m_reg)
## WAIC pWAIC dWAIC weight SE dSE
## m4 1926.6 5.9 0.0 0.48 25.54 NA
## m5 1928.0 6.7 1.4 0.24 25.43 1.12
## m6 1928.1 7.4 1.5 0.23 24.93 3.05
## m_reg 1931.2 7.0 4.6 0.05 25.60 2.34
## m3 1952.7 5.6 26.2 0.00 24.24 10.93
## m2 2150.1 5.3 223.5 0.00 22.63 26.87
## m1 2395.4 3.4 468.9 0.00 22.89 31.14
calculateDeviance(m_reg)
## [1] 1875.476