library(rethinking)
data(tulips)
d <- tulips
str(d)
## 'data.frame': 27 obs. of 4 variables:
## $ bed : Factor w/ 3 levels "a","b","c": 1 1 1 1 1 1 1 1 1 2 ...
## $ water : int 1 1 1 2 2 2 3 3 3 1 ...
## $ shade : int 1 2 3 1 2 3 1 2 3 1 ...
## $ blooms: num 0 0 111 183.5 59.2 ...
# center variables
d$water.centered <- d$water - mean(d$water)
d$shade.centered <- d$shade - mean(d$shade)
d$bed.b <- ifelse(d$bed == "b", 1, 0)
d$bed.c <- ifelse(d$bed == "c", 1, 0)
# fit model with interaction term plus main effects for unique levels of `bed`
m7H1 <- map(
alist(
blooms ~ dnorm( mu , sigma ) ,
mu <- alpha + beta.water*water.centered + beta.shade*shade.centered + beta.water.shade*water*shade + beta.bed.b*bed.b + beta.bed.c*bed.c,
c(alpha, beta.water, beta.shade, beta.water.shade, beta.bed.b, beta.bed.c) ~ dnorm( 0, 100 ),
sigma ~ dunif( 0 , 100 )
),
data=d,
start=list(alpha=mean(d$blooms), beta.water=0, beta.shade=0, beta.water.shade=0, beta.bed.b=0, beta.bed.c=0, sigma=sd(d$blooms)),
control=list(maxit=1e4)
)
m7H2 <- map(
alist(
blooms ~ dnorm( mu , sigma ) ,
mu <- alpha + beta.water*water.centered + beta.shade*shade.centered + beta.water.shade*water*shade,
alpha ~ dnorm(130, 100),
c(beta.water, beta.shade, beta.water.shade) ~ dnorm( 0, 100 ),
sigma ~ dunif( 0 , 100 )
),
data=d,
start=list(alpha=mean(d$blooms), beta.water=0, beta.shade=0, beta.water.shade=0, sigma=sd(d$blooms)),
control=list(maxit=1e4)
)
compare(m7H1, m7H2)
## WAIC pWAIC dWAIC weight SE dSE
## m7H1 296.4 9.3 0 0.62 10.76 NA
## m7H2 297.4 6.5 1 0.38 10.18 8.97
posterior.samples <- extract.samples(m7H1)
hist(posterior.samples$beta.bed.b)
hist(posterior.samples$beta.bed.c)
hist(posterior.samples$alpha)
plot(precis(m7H1))
data(rugged)
d <- rugged
d <- d[complete.cases(d$rgdppc_2000),]
d$log_gdp <- log(d$rgdppc_2000)
d$rugged.centered <- d$rugged - mean(d$rugged)
# a.
## fit model with all data
m.7H3.all <- map(
alist(
log_gdp ~ dnorm( mu, sigma ),
mu <- alpha + beta.rugged*rugged + beta.cont_africa*cont_africa + beta.rugged.cont_africa*rugged*cont_africa,
alpha ~ dnorm(8.5, 5),
c(beta.rugged, beta.cont_africa, beta.rugged.cont_africa) ~ dnorm(0, 10),
sigma ~ dunif(0, 10)
), data=d
)
## fit model with all data except that of the Seychelles
m.7H3.except.seychelles <- map(
alist(
log_gdp ~ dnorm( mu, sigma ),
mu <- alpha + beta.rugged*rugged + beta.cont_africa*cont_africa + beta.rugged.cont_africa*rugged*cont_africa,
alpha ~ dnorm(8.5, 5),
c(beta.rugged, beta.cont_africa, beta.rugged.cont_africa) ~ dnorm(0, 10),
sigma ~ dunif(0, 10)
), data=d[(d$country != "Seychelles"),]
)
## create triptych plot to examine whether the effect of ruggedness on log-GDP still does depend on continent
plotTriptych <- function(model) {
par(mfrow=c(1,2))
rugged.seq <- 0:8
# loop over values of africa and plot predictions
for ( is.africa in 0:1 ) {
dt <- d[d$cont_africa==is.africa,]
plot( log_gdp ~ rugged , data=dt , col=rangi2 ,
main=paste("cont_africa =", is.africa) , xlim=c(0, 8) , ylim=c(5.5,10) ,
xlab="rugged" )
mu <- link( model , data=data.frame(cont_africa=is.africa, rugged=rugged.seq) )
mu.mean <- apply( mu , 2 , mean )
mu.PI <- apply( mu , 2 , PI , prob=0.97 )
lines( rugged.seq , mu.mean )
lines( rugged.seq , mu.PI[1,] , lty=2 )
lines( rugged.seq , mu.PI[2,] , lty=2 )
}
}
plotTriptych(model = m.7H3.all)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
plotTriptych(model = m.7H3.except.seychelles)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## compute MAP values for the interaction coefficient at cont_africa=1 for each of the models
coef.m.7H3.all <- coef(m.7H3.all)
coef.m.7H3.except.seychelles <- coef(m.7H3.except.seychelles)
coef.m.7H3.all["beta.rugged"] + coef.m.7H3.all["beta.rugged.cont_africa"]*1
## beta.rugged
## 0.1902087
coef.m.7H3.except.seychelles["beta.rugged"] + coef.m.7H3.except.seychelles["beta.rugged.cont_africa"]*1
## beta.rugged
## 0.09453917
d.except.seychelles <- d[(d$country != "Seychelles"),]
m.7H3.c.1 <- map(
alist(
log_gdp ~ dnorm( mu, sigma ),
mu <- alpha + beta.rugged*rugged.centered,
alpha ~ dnorm(8.5, 5),
beta.rugged ~ dnorm(0, 100),
sigma ~ dunif(0, 10)
), data=d.except.seychelles
)
m.7H3.c.2 <- map(
alist(
log_gdp ~ dnorm( mu, sigma ),
mu <- alpha + beta.rugged*rugged.centered + beta.cont_africa*cont_africa,
alpha ~ dnorm(8.5, 5),
c(beta.rugged, beta.cont_africa) ~ dnorm(0, 100),
sigma ~ dunif(0, 10)
), data=d.except.seychelles
)
m.7H3.c.3 <- map(
alist(
log_gdp ~ dnorm( mu, sigma ),
mu <- alpha + beta.rugged*rugged.centered + beta.cont_africa*cont_africa + beta.rugged.cont_africa*rugged.centered*cont_africa,
alpha ~ dnorm(8.5, 5),
c(beta.rugged, beta.cont_africa, beta.rugged.cont_africa) ~ dnorm(0, 100),
sigma ~ dunif(0, 10)
), data=d.except.seychelles
)
compare(m.7H3.c.1, m.7H3.c.2, m.7H3.c.3)
## WAIC pWAIC dWAIC weight SE dSE
## m.7H3.c.3 463.8 4.9 0.0 0.79 15.42 NA
## m.7H3.c.2 466.4 4.1 2.6 0.21 14.34 3.99
## m.7H3.c.1 536.1 2.7 72.4 0.00 13.38 15.98
## generate WAIC-averaged predictions
rugged.seq <- seq(from = 0, to = 8, by = .1)
rugged.seq.centered <- rugged.seq - mean(rugged.seq)
prediction.data = data.frame(rugged.centered = rugged.seq.centered, cont_africa = 1)
mu.ensemble <- ensemble(m.7H3.c.1, m.7H3.c.2, m.7H3.c.3, data = prediction.data)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply(X = mu.ensemble$link, MARGIN = 2, FUN = mean)
mu.PI <- apply(X = mu.ensemble$link, MARGIN = 2, FUN = PI)
data.plot <- d[(d$cont_africa == 1),]
plot(log_gdp ~ rugged.centered, data=data.plot, pch=ifelse(data.plot$country == "Seychelles", 16, 1), col="slateblue")
lines( rugged.seq.centered, mu.mean )
lines( rugged.seq.centered, mu.PI[1,], lty=2 )
lines( rugged.seq.centered, mu.PI[2,], lty=2 )
C
data(nettle)
d <- nettle
d$lang.per.cap <- d$num.lang / d$k.pop
d$log.lang.per.cap <- log(d$lang.per.cap)
d$log.area <- log(d$area)
d$mean.growing.season.c <- scale(d$mean.growing.season)
d$sd.growing.season.c <- scale(d$sd.growing.season)
m <- map(
alist(
log.lang.per.cap ~ dnorm( mu, sigma ),
mu <- alpha + beta.mean.growing.season*mean.growing.season.c + beta.sd.growing.season*sd.growing.season.c + beta.mgs.sdgs*mean.growing.season.c*sd.growing.season.c + beta.log.area*log.area,
c(alpha, beta.mean.growing.season, beta.sd.growing.season, beta.mgs.sdgs, beta.log.area) ~ dnorm( 0, 50 ),
sigma ~ dunif(0, 25)
), data = d
)
precis(m)
## Mean StdDev 5.5% 94.5%
## alpha -5.37 2.06 -8.66 -2.08
## beta.mean.growing.season 0.36 0.19 0.05 0.66
## beta.sd.growing.season -0.36 0.19 -0.66 -0.06
## beta.mgs.sdgs -0.36 0.16 -0.62 -0.11
## beta.log.area -0.01 0.16 -0.26 0.25
## sigma 1.31 0.11 1.13 1.48
dens(d$mean.growing.season.c)
dens(d$sd.growing.season.c)
par(mfrow=c(2,2))
mean.growing.season.seq = seq(-3,2,0.1)
d$sd.growing.season.groups <- cut(d$sd.growing.season.c, breaks=quantile(d$sd.growing.season.c, probs=c(0, 0.25,0.5,0.75,1)),include.lowest=T, dig.lab=2)
# loop over values of africa and plot predictions
for ( sd.group in levels(d$sd.growing.season.groups) ){
dt <- d[d$sd.growing.season.groups==sd.group,]
plot( log.lang.per.cap ~ mean.growing.season.c , data=dt , col=rangi2 ,
main=paste("sd growing season =", sd.group) , xlim=c(-4, 3) , ylim=c(-10,0) ,
xlab="mean growing season" )
mu <- link( m , data=data.frame(sd.growing.season.c = mean(dt$sd.growing.season.c), mean.growing.season.c=mean.growing.season.seq, log.area=mean(d$log.area)) )
mu.mean <- apply( mu , 2 , mean )
mu.PI <- apply( mu , 2 , PI , prob=0.97 )
lines( mean.growing.season.seq , mu.mean )
lines( mean.growing.season.seq , mu.PI[1,] , lty=2 )
lines( mean.growing.season.seq , mu.PI[2,] , lty=2 )
}
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mean season is positively associated at low sd, but this effect decreases as sd increases.