Easy

11E1

Ordered A>B>C>D but the distances between each category can differ. For example, developmental stages. For unordered categoricals there is no such structure, for example furniture types.

11E2

cumulative log-odds give the odds of that value or any smaller value. Normal log-odds represent the odds of a particular value.

11E3

it will think the outcome of the process modeled is zero more often than is the case, as the zeroes could be arising from a different process.

11E4

Overdispersion: missing some key measurement that causes a lot of the variance in the data

Medium

11M1

nr_employees <- c(12,36,7,41,0) # add zero so that the final odds will be 1/0

for(i in 1:4){
  print(log(sum(nr_employees[1:i])/sum(nr_employees[(i+1):5])))
}
## [1] -1.94591
## [1] 0
## [1] 0.2937611
## [1] Inf

11M2

cum_prop=nr_employees/sum(nr_employees)
cum_prop=sapply(1:4,function(i){sum(cum_prop[1:i])})
plot(y=cum_prop,x=1:4,type="b", ylim=c(0,1))
segments(1:4,0,1:4,cum_prop)
for(i in 1:4){segments(i+0.05,c(0,cum_prop)[i],i+0.05,cum_prop[i], col = "blue")}

11M3

Likelyhood for binomial: Pr(y | N, p) = px(1-p)n-x where x is number of successes.

(1 - p_not_work) * p_success^n_success (1-p_success)^n_trials-n_success

Hard

11H1

library(rethinking)
data("Hurricanes")

m11H1.map <- map(
  alist(
    deaths ~ dpois( lambda ),
    log(lambda) <- a + bF*femininity,
    a ~ dnorm(0,10),
    bF ~ dnorm(0,5)
  ) ,
  data=Hurricanes)

m11H1.map2 <- map(
  alist(
    deaths ~ dpois( lambda ),
    log(lambda) <- a ,
    a ~ dnorm(0,10)
  ) ,
  data=Hurricanes)

compare(m11H1.map,m11H1.map2)
##              WAIC pWAIC dWAIC weight      SE    dSE
## m11H1.map  4398.5 130.8   0.0      1  990.72     NA
## m11H1.map2 4447.3  78.1  48.8      0 1074.47 147.31
y <- sim(m11H1.map)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.mean <- colMeans(y)
y.PI <- apply(y, 2, PI)


# plot the model predictions for `y` vs. the actual number of successes for each case
plot(y=Hurricanes$deaths, x=Hurricanes$femininity, col=rangi2, ylab="deaths", xlab="femininity", pch=16)
points(y=y.mean, x=Hurricanes$femininity, pch=1)
segments(x0=Hurricanes$femininity, x1= Hurricanes$femininity, y0=y.PI[1,], y1=y.PI[2,])

lines(y= y.mean[order(Hurricanes$femininity)],  x=sort(Hurricanes$femininity))
lines( y.PI[1,order(Hurricanes$femininity)],  x=sort(Hurricanes$femininity), lty=2 )
lines( y.PI[2,order(Hurricanes$femininity)],  x=sort(Hurricanes$femininity), lty=2 )

It looks like the relationship isn’t very strong, but the deadlies hurricanes seem to be more feminine in name

11H2

library(rethinking)
data("Hurricanes")

m11H2 <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity,
    a ~ dnorm(0,10),
    bF ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2))

precis(m11H2)
##        Mean StdDev  5.5% 94.5%
## scale 42.46   8.44 28.96 55.95
## a      2.92   0.27  2.49  3.35
## bF     0.01   0.03 -0.04  0.06
y <- sim(m11H2)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.mean <- colMeans(y)
y.PI <- apply(y, 2, PI)


# plot the model predictions for `y` vs. the actual number of successes for each case
plot(y=Hurricanes$deaths, x=Hurricanes$femininity, col=rangi2, ylab="deaths", xlab="femininity", pch=16)
points(y=y.mean, x=Hurricanes$femininity, pch=1)
segments(x0=Hurricanes$femininity, x1= Hurricanes$femininity, y0=y.PI[1,], y1=y.PI[2,])

lines(y= y.mean[order(Hurricanes$femininity)],  x=sort(Hurricanes$femininity))
lines( y.PI[1,order(Hurricanes$femininity)],  x=sort(Hurricanes$femininity), lty=2 )
lines( y.PI[2,order(Hurricanes$femininity)],  x=sort(Hurricanes$femininity), lty=2 )

Now the model captures this dispersion, although it is still useless.

11H3

data("Hurricanes")

Hurricanes<-Hurricanes[complete.cases(Hurricanes),]
Hurricanes$damage_norm <- scale(Hurricanes$damage_norm)
Hurricanes$min_pressure <- scale(Hurricanes$min_pressure)

m11H2_2 <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity +bP*min_pressure + bPF*min_pressure*femininity,
    a ~ dnorm(0,10),
    c(bF,bP,bPF) ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2, bF=0, bP=0, bPF=0))

m11H2_3 <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity +bD*damage_norm + bDF*damage_norm*femininity,
    a ~ dnorm(0,10),
    c(bF,bD,bDF) ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2, bF=0, bD=0, bDF=0))

m11H2_4 <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity +bD*damage_norm + bDF*damage_norm*femininity +bP*min_pressure + bPF*min_pressure*femininity,
    a ~ dnorm(0,10),
    c(bF,bD,bDF,bP,bPF) ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2, bF=0, bD=0, bDF=0, bP=0, bPF=0))

m11H2_5 <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity +bD*damage_norm + bP*min_pressure + bPF*min_pressure*femininity,
    a ~ dnorm(0,10),
    c(bF,bD,bP,bPF) ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2, bF=0, bD=0, bP=0, bPF=0))

m11H2_6 <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity +bD*damage_norm + bP*min_pressure + bDF*damage_norm*femininity,
    a ~ dnorm(0,10),
    c(bF,bD,bDF,bP) ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2, bF=0, bD=0, bDF=0, bP=0))

precis(m11H2)
##        Mean StdDev  5.5% 94.5%
## scale 42.46   8.44 28.96 55.95
## a      2.92   0.27  2.49  3.35
## bF     0.01   0.03 -0.04  0.06
precis(m11H2_2)
##        Mean StdDev  5.5% 94.5%
## scale 26.62   5.23 18.26 34.98
## bF     0.00   0.03 -0.05  0.05
## bP    -0.46   0.22 -0.81 -0.11
## bPF   -0.03   0.03 -0.08  0.01
## a      2.76   0.25  2.36  3.16
precis(m11H2_3)
##        Mean StdDev  5.5% 94.5%
## scale 28.19   5.35 19.65 36.74
## bF     0.02   0.03 -0.03  0.07
## bD     0.37   0.13  0.16  0.58
## bDF    0.01   0.02 -0.01  0.04
## a      2.69   0.26  2.28  3.10
compare(m11H2,m11H2_2,m11H2_3, m11H2_4, m11H2_5, m11H2_6)
##          WAIC pWAIC dWAIC weight    SE   dSE
## m11H2_6 681.0  10.2   0.0   0.66 37.91    NA
## m11H2_5 683.1  11.4   2.1   0.23 38.97  3.20
## m11H2_4 684.9  12.4   3.9   0.09 38.65  2.02
## m11H2_2 688.5   8.5   7.6   0.02 38.19  9.70
## m11H2_3 689.5   7.4   8.6   0.01 35.63 13.12
## m11H2   714.3   4.9  33.4   0.00 34.57 15.49

For simplicity, use only the 5th model here

# Make data with av
counterfactual_plot <- function(mdl=m11H2_5 ,minpres=0, damnorm=0){

fake_data=data.frame(min_pressure=rep(minpres,11), damage_norm=rep(damnorm,11), log_damage_norm=rep(damnorm,11), femininity=c(1:11))
# added in the log_damage_norm for the next question

y <- sim(mdl, data = fake_data)

y.mean <- colMeans(y)
y.PI <- apply(y, 2, PI)


# plot the model predictions for `y` on the scale of actual deaths.
ttl <- paste0("minpres=",minpres," - damnorm=",damnorm)
plot(y=y.mean, x=fake_data$femininity, col=rangi2, ylab="deaths", xlab="femininity", pch=16, ylim=c(0,max(Hurricanes$deaths)), main=ttl)

lines(y= y.mean,  x=fake_data$femininity)
lines( y.PI[1,],  x=fake_data$femininity, lty=2 )
lines( y.PI[2,],  x=fake_data$femininity, lty=2 )


dens(y[,1], main="low pressure", xlab="deaths")
dens(y[,11], add=T, col="red")
legend("topright", legend=c("fem=1","fem=11"), fill = c("black","red"))
}

vargrid <- expand.grid(minpres=-2:2,damnorm=0)

par(mfrow=c(2,2))
for(i in 1:nrow(vargrid)){
  counterfactual_plot(minpres=vargrid$minpres[i], damnorm=vargrid$damnorm[i])
}
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]

At very low values of min_pressure, there seems to be some effect of femininity, but these storms are very rare.

11H4

data("Hurricanes")
Hurricanes<-Hurricanes[complete.cases(Hurricanes),]

Hurricanes$log_damage_norm <- scale(log(Hurricanes$damage_norm))
Hurricanes$damage_norm <- scale(Hurricanes$damage_norm)
Hurricanes$min_pressure <- scale(Hurricanes$min_pressure)

m11H2_5_log <- map(
  alist(
    deaths ~ dgampois( mu, scale ),
    log(mu) <- a + bF*femininity +bD*log_damage_norm + bP*min_pressure + bPF*min_pressure*femininity,
    a ~ dnorm(0,10),
    c(bF,bD,bP,bPF) ~ dnorm(0,5),
    scale ~ dcauchy(0, 2)
  ) ,
  data=Hurricanes,
  start=list(scale=2, bF=0, bD=0, bP=0, bPF=0))

compare(m11H2_5,m11H2_5_log)
##              WAIC pWAIC dWAIC weight    SE   dSE
## m11H2_5_log 676.0  12.3   0.0   0.97 37.06    NA
## m11H2_5     683.2  11.6   7.2   0.03 39.31 10.55

model with the log gets almost all the weight.

pdf("Chapter11_H3_counterfactual_logs.pdf",width=6, height = 20)
par(mfrow=c(5,2))
for(i in 1:nrow(vargrid)){
  counterfactual_plot(mdl = m11H2_5_log, minpres=vargrid$minpres[i], damnorm=vargrid$damnorm[i])
}
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
dev.off()
## png 
##   2

11H5

library(rethinking)
data(Trolley)
?Trolley

#m11.3 was the best model in the text
m11.3 <- map(
  alist(
    response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
    phi <- bA*action + bI*intention + bC*contact +
      bAI*action*intention + bCI*contact*intention ,
    c(bA,bI,bC,bAI,bCI) ~ dnorm(0,10),
    c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
  ) ,
  data=Trolley ,
  start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )

# add some terms
m11.3_2 <- map(
  alist(
    response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
    phi <- bA*action + bI*intention + bC*contact + bM*male +
      bAI*action*intention + bCI*contact*intention ,
    c(bA,bI,bC,bAI,bCI,bM) ~ dnorm(0,10),
    c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
  ) ,
  data=Trolley ,
  start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )

m11.3_3 <- map(
  alist(
    response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
    phi <- bA*action + bI*intention + bC*contact +
      bAI*action*intention + bCI*contact*intention + bM*male + bMC*male*contact,
    c(bA,bI,bC,bAI,bCI,bM,bMC) ~ dnorm(0,10),
    c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
  ) ,
  data=Trolley ,
  start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )

compare(m11.3,m11.3_2,m11.3_3)
##            WAIC pWAIC dWAIC weight    SE   dSE
## m11.3_3 36670.2  13.0   0.0   0.88 87.37    NA
## m11.3_2 36674.1  12.2   3.9   0.12 86.95  4.71
## m11.3   36929.6  11.2 259.4   0.00 81.18 31.67
plot(precis(m11.3_3))

Including male*contact does improve the WAIC scores. bM is positive, suggesting that males find anything more morally permissive at baseline. the interaction is negative, suggesting that there is a stronger negative effect of contact in males.

plot the difference in scores:

## R code 11.17
post <- extract.samples( m11.3_3 )

## R code 11.18

## R code 11.19
plotResult <- function(kA=0, kC=0:1, kI=1){  

# kA # value for action
# kC # value for contact
# kI # value for intention
# kM # values of male to calculate over


plot( 1 , 1 , type="n" , xlab="contact" , ylab="probability" ,
      xlim=c(0,1) , ylim=c(0,1) , xaxp=c(0,1,1) , yaxp=c(0,1,2) )


kM=0
for ( s in 1:100 ) {
  p <- post[s,]
  ak <- as.numeric(p[1:6])
  phi <- p$bA*kA + p$bI*kI + p$bC*kC + p$bAI*kA*kI + p$bCI*kC*kI +p$bM*kM + p$bMC*kM*kC
  pk <- pordlogit( 1:6 , a=ak , phi=phi )
  for ( i in 1:6 )
    lines( kC , pk[,i] , col=col.alpha("red",0.1) )
}
kM=1
for ( s in 1:100 ) {
  p <- post[s,]
  ak <- as.numeric(p[1:6])
  phi <- p$bA*kA + p$bI*kI + p$bC*kC + p$bAI*kA*kI + p$bCI*kC*kI +p$bM*kM + p$bMC*kM*kC
  pk <- pordlogit( 1:6 , a=ak , phi=phi )
  for ( i in 1:6 )
    lines( kC , pk[,i] , col=col.alpha(rangi2,0.1) )
}
mtext( concat( "action=",kA,", intention=",kI ) )
}


plotResult(kA=0, kI=0)

plotResult(kA=1, kI=0)

plotResult(kA=0, kI=1)

plotResult(kA=1, kI=1)

This confirms the idea that men score for more morally permissive at baseline. As a result, there is also room for a steeper decline in permissability.

11H6

data(Fish)

# compute offset:
Fish$log_hours = log(Fish$hours)

# p > are these people going fishing?
# lambda -> how many fish do people catch if they fish?

# use all variables in both formulas, although maybe a camper affects mostly whether they are going fishing?

m11H6 <- map(
  alist(
    fish_caught ~ dzipois( p , lambda ),
    logit(p) <- ap + bpLB*livebait + bpC*camper + bpP*persons + bpC*child,
    log(lambda) <- log_hours + al + blLB*livebait + blC*camper + blP*persons + blC*child,
    ap ~ dnorm(0,1),
    al ~ dnorm(0,10),
    c(bpLB,bpC,bpP,bpC,blLB,blC,blP,blC) ~ dnorm(0,5)
  ) ,
  data=Fish, start=list(bpLB=0,bpC=0,bpP=0,bpC=0,blLB=0,blC=0,blP=0,blC=0) )
plot(precis(m11H6))

l=link(m11H6)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y=sim(m11H6)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.mean <- colMeans(y)

plot(sort(Fish$fish_caught), pch=20, ylim=c(0,20))
points(y.mean[order(Fish$fish_caught)])

dens(Fish$fish_caught, xlim=c(0,50))
dens(y, xlim=c(0,50), col="red", lty=2, add=T)