Ordered A>B>C>D but the distances between each category can differ. For example, developmental stages. For unordered categoricals there is no such structure, for example furniture types.
cumulative log-odds give the odds of that value or any smaller value. Normal log-odds represent the odds of a particular value.
it will think the outcome of the process modeled is zero more often than is the case, as the zeroes could be arising from a different process.
Overdispersion: missing some key measurement that causes a lot of the variance in the data
nr_employees <- c(12,36,7,41,0) # add zero so that the final odds will be 1/0
for(i in 1:4){
print(log(sum(nr_employees[1:i])/sum(nr_employees[(i+1):5])))
}
## [1] -1.94591
## [1] 0
## [1] 0.2937611
## [1] Inf
cum_prop=nr_employees/sum(nr_employees)
cum_prop=sapply(1:4,function(i){sum(cum_prop[1:i])})
plot(y=cum_prop,x=1:4,type="b", ylim=c(0,1))
segments(1:4,0,1:4,cum_prop)
for(i in 1:4){segments(i+0.05,c(0,cum_prop)[i],i+0.05,cum_prop[i], col = "blue")}
Likelyhood for binomial: Pr(y | N, p) = px(1-p)n-x where x is number of successes.
(1 - p_not_work) * p_success^n_success (1-p_success)^n_trials-n_success
library(rethinking)
data("Hurricanes")
m11H1.map <- map(
alist(
deaths ~ dpois( lambda ),
log(lambda) <- a + bF*femininity,
a ~ dnorm(0,10),
bF ~ dnorm(0,5)
) ,
data=Hurricanes)
m11H1.map2 <- map(
alist(
deaths ~ dpois( lambda ),
log(lambda) <- a ,
a ~ dnorm(0,10)
) ,
data=Hurricanes)
compare(m11H1.map,m11H1.map2)
## WAIC pWAIC dWAIC weight SE dSE
## m11H1.map 4398.5 130.8 0.0 1 990.72 NA
## m11H1.map2 4447.3 78.1 48.8 0 1074.47 147.31
y <- sim(m11H1.map)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.mean <- colMeans(y)
y.PI <- apply(y, 2, PI)
# plot the model predictions for `y` vs. the actual number of successes for each case
plot(y=Hurricanes$deaths, x=Hurricanes$femininity, col=rangi2, ylab="deaths", xlab="femininity", pch=16)
points(y=y.mean, x=Hurricanes$femininity, pch=1)
segments(x0=Hurricanes$femininity, x1= Hurricanes$femininity, y0=y.PI[1,], y1=y.PI[2,])
lines(y= y.mean[order(Hurricanes$femininity)], x=sort(Hurricanes$femininity))
lines( y.PI[1,order(Hurricanes$femininity)], x=sort(Hurricanes$femininity), lty=2 )
lines( y.PI[2,order(Hurricanes$femininity)], x=sort(Hurricanes$femininity), lty=2 )
It looks like the relationship isn’t very strong, but the deadlies hurricanes seem to be more feminine in name
library(rethinking)
data("Hurricanes")
m11H2 <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity,
a ~ dnorm(0,10),
bF ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2))
precis(m11H2)
## Mean StdDev 5.5% 94.5%
## scale 42.46 8.44 28.96 55.95
## a 2.92 0.27 2.49 3.35
## bF 0.01 0.03 -0.04 0.06
y <- sim(m11H2)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.mean <- colMeans(y)
y.PI <- apply(y, 2, PI)
# plot the model predictions for `y` vs. the actual number of successes for each case
plot(y=Hurricanes$deaths, x=Hurricanes$femininity, col=rangi2, ylab="deaths", xlab="femininity", pch=16)
points(y=y.mean, x=Hurricanes$femininity, pch=1)
segments(x0=Hurricanes$femininity, x1= Hurricanes$femininity, y0=y.PI[1,], y1=y.PI[2,])
lines(y= y.mean[order(Hurricanes$femininity)], x=sort(Hurricanes$femininity))
lines( y.PI[1,order(Hurricanes$femininity)], x=sort(Hurricanes$femininity), lty=2 )
lines( y.PI[2,order(Hurricanes$femininity)], x=sort(Hurricanes$femininity), lty=2 )
Now the model captures this dispersion, although it is still useless.
data("Hurricanes")
Hurricanes<-Hurricanes[complete.cases(Hurricanes),]
Hurricanes$damage_norm <- scale(Hurricanes$damage_norm)
Hurricanes$min_pressure <- scale(Hurricanes$min_pressure)
m11H2_2 <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity +bP*min_pressure + bPF*min_pressure*femininity,
a ~ dnorm(0,10),
c(bF,bP,bPF) ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2, bF=0, bP=0, bPF=0))
m11H2_3 <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity +bD*damage_norm + bDF*damage_norm*femininity,
a ~ dnorm(0,10),
c(bF,bD,bDF) ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2, bF=0, bD=0, bDF=0))
m11H2_4 <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity +bD*damage_norm + bDF*damage_norm*femininity +bP*min_pressure + bPF*min_pressure*femininity,
a ~ dnorm(0,10),
c(bF,bD,bDF,bP,bPF) ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2, bF=0, bD=0, bDF=0, bP=0, bPF=0))
m11H2_5 <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity +bD*damage_norm + bP*min_pressure + bPF*min_pressure*femininity,
a ~ dnorm(0,10),
c(bF,bD,bP,bPF) ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2, bF=0, bD=0, bP=0, bPF=0))
m11H2_6 <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity +bD*damage_norm + bP*min_pressure + bDF*damage_norm*femininity,
a ~ dnorm(0,10),
c(bF,bD,bDF,bP) ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2, bF=0, bD=0, bDF=0, bP=0))
precis(m11H2)
## Mean StdDev 5.5% 94.5%
## scale 42.46 8.44 28.96 55.95
## a 2.92 0.27 2.49 3.35
## bF 0.01 0.03 -0.04 0.06
precis(m11H2_2)
## Mean StdDev 5.5% 94.5%
## scale 26.62 5.23 18.26 34.98
## bF 0.00 0.03 -0.05 0.05
## bP -0.46 0.22 -0.81 -0.11
## bPF -0.03 0.03 -0.08 0.01
## a 2.76 0.25 2.36 3.16
precis(m11H2_3)
## Mean StdDev 5.5% 94.5%
## scale 28.19 5.35 19.65 36.74
## bF 0.02 0.03 -0.03 0.07
## bD 0.37 0.13 0.16 0.58
## bDF 0.01 0.02 -0.01 0.04
## a 2.69 0.26 2.28 3.10
compare(m11H2,m11H2_2,m11H2_3, m11H2_4, m11H2_5, m11H2_6)
## WAIC pWAIC dWAIC weight SE dSE
## m11H2_6 681.0 10.2 0.0 0.66 37.91 NA
## m11H2_5 683.1 11.4 2.1 0.23 38.97 3.20
## m11H2_4 684.9 12.4 3.9 0.09 38.65 2.02
## m11H2_2 688.5 8.5 7.6 0.02 38.19 9.70
## m11H2_3 689.5 7.4 8.6 0.01 35.63 13.12
## m11H2 714.3 4.9 33.4 0.00 34.57 15.49
For simplicity, use only the 5th model here
# Make data with av
counterfactual_plot <- function(mdl=m11H2_5 ,minpres=0, damnorm=0){
fake_data=data.frame(min_pressure=rep(minpres,11), damage_norm=rep(damnorm,11), log_damage_norm=rep(damnorm,11), femininity=c(1:11))
# added in the log_damage_norm for the next question
y <- sim(mdl, data = fake_data)
y.mean <- colMeans(y)
y.PI <- apply(y, 2, PI)
# plot the model predictions for `y` on the scale of actual deaths.
ttl <- paste0("minpres=",minpres," - damnorm=",damnorm)
plot(y=y.mean, x=fake_data$femininity, col=rangi2, ylab="deaths", xlab="femininity", pch=16, ylim=c(0,max(Hurricanes$deaths)), main=ttl)
lines(y= y.mean, x=fake_data$femininity)
lines( y.PI[1,], x=fake_data$femininity, lty=2 )
lines( y.PI[2,], x=fake_data$femininity, lty=2 )
dens(y[,1], main="low pressure", xlab="deaths")
dens(y[,11], add=T, col="red")
legend("topright", legend=c("fem=1","fem=11"), fill = c("black","red"))
}
vargrid <- expand.grid(minpres=-2:2,damnorm=0)
par(mfrow=c(2,2))
for(i in 1:nrow(vargrid)){
counterfactual_plot(minpres=vargrid$minpres[i], damnorm=vargrid$damnorm[i])
}
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
At very low values of min_pressure, there seems to be some effect of femininity, but these storms are very rare.
data("Hurricanes")
Hurricanes<-Hurricanes[complete.cases(Hurricanes),]
Hurricanes$log_damage_norm <- scale(log(Hurricanes$damage_norm))
Hurricanes$damage_norm <- scale(Hurricanes$damage_norm)
Hurricanes$min_pressure <- scale(Hurricanes$min_pressure)
m11H2_5_log <- map(
alist(
deaths ~ dgampois( mu, scale ),
log(mu) <- a + bF*femininity +bD*log_damage_norm + bP*min_pressure + bPF*min_pressure*femininity,
a ~ dnorm(0,10),
c(bF,bD,bP,bPF) ~ dnorm(0,5),
scale ~ dcauchy(0, 2)
) ,
data=Hurricanes,
start=list(scale=2, bF=0, bD=0, bP=0, bPF=0))
compare(m11H2_5,m11H2_5_log)
## WAIC pWAIC dWAIC weight SE dSE
## m11H2_5_log 676.0 12.3 0.0 0.97 37.06 NA
## m11H2_5 683.2 11.6 7.2 0.03 39.31 10.55
model with the log gets almost all the weight.
pdf("Chapter11_H3_counterfactual_logs.pdf",width=6, height = 20)
par(mfrow=c(5,2))
for(i in 1:nrow(vargrid)){
counterfactual_plot(mdl = m11H2_5_log, minpres=vargrid$minpres[i], damnorm=vargrid$damnorm[i])
}
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
dev.off()
## png
## 2
library(rethinking)
data(Trolley)
?Trolley
#m11.3 was the best model in the text
m11.3 <- map(
alist(
response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
phi <- bA*action + bI*intention + bC*contact +
bAI*action*intention + bCI*contact*intention ,
c(bA,bI,bC,bAI,bCI) ~ dnorm(0,10),
c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
) ,
data=Trolley ,
start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )
# add some terms
m11.3_2 <- map(
alist(
response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
phi <- bA*action + bI*intention + bC*contact + bM*male +
bAI*action*intention + bCI*contact*intention ,
c(bA,bI,bC,bAI,bCI,bM) ~ dnorm(0,10),
c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
) ,
data=Trolley ,
start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )
m11.3_3 <- map(
alist(
response ~ dordlogit( phi , c(a1,a2,a3,a4,a5,a6) ) ,
phi <- bA*action + bI*intention + bC*contact +
bAI*action*intention + bCI*contact*intention + bM*male + bMC*male*contact,
c(bA,bI,bC,bAI,bCI,bM,bMC) ~ dnorm(0,10),
c(a1,a2,a3,a4,a5,a6) ~ dnorm(0,10)
) ,
data=Trolley ,
start=list(a1=-1.9,a2=-1.2,a3=-0.7,a4=0.2,a5=0.9,a6=1.8) )
compare(m11.3,m11.3_2,m11.3_3)
## WAIC pWAIC dWAIC weight SE dSE
## m11.3_3 36670.2 13.0 0.0 0.88 87.37 NA
## m11.3_2 36674.1 12.2 3.9 0.12 86.95 4.71
## m11.3 36929.6 11.2 259.4 0.00 81.18 31.67
plot(precis(m11.3_3))
Including male*contact does improve the WAIC scores. bM is positive, suggesting that males find anything more morally permissive at baseline. the interaction is negative, suggesting that there is a stronger negative effect of contact in males.
plot the difference in scores:
## R code 11.17
post <- extract.samples( m11.3_3 )
## R code 11.18
## R code 11.19
plotResult <- function(kA=0, kC=0:1, kI=1){
# kA # value for action
# kC # value for contact
# kI # value for intention
# kM # values of male to calculate over
plot( 1 , 1 , type="n" , xlab="contact" , ylab="probability" ,
xlim=c(0,1) , ylim=c(0,1) , xaxp=c(0,1,1) , yaxp=c(0,1,2) )
kM=0
for ( s in 1:100 ) {
p <- post[s,]
ak <- as.numeric(p[1:6])
phi <- p$bA*kA + p$bI*kI + p$bC*kC + p$bAI*kA*kI + p$bCI*kC*kI +p$bM*kM + p$bMC*kM*kC
pk <- pordlogit( 1:6 , a=ak , phi=phi )
for ( i in 1:6 )
lines( kC , pk[,i] , col=col.alpha("red",0.1) )
}
kM=1
for ( s in 1:100 ) {
p <- post[s,]
ak <- as.numeric(p[1:6])
phi <- p$bA*kA + p$bI*kI + p$bC*kC + p$bAI*kA*kI + p$bCI*kC*kI +p$bM*kM + p$bMC*kM*kC
pk <- pordlogit( 1:6 , a=ak , phi=phi )
for ( i in 1:6 )
lines( kC , pk[,i] , col=col.alpha(rangi2,0.1) )
}
mtext( concat( "action=",kA,", intention=",kI ) )
}
plotResult(kA=0, kI=0)
plotResult(kA=1, kI=0)
plotResult(kA=0, kI=1)
plotResult(kA=1, kI=1)
This confirms the idea that men score for more morally permissive at baseline. As a result, there is also room for a steeper decline in permissability.
data(Fish)
# compute offset:
Fish$log_hours = log(Fish$hours)
# p > are these people going fishing?
# lambda -> how many fish do people catch if they fish?
# use all variables in both formulas, although maybe a camper affects mostly whether they are going fishing?
m11H6 <- map(
alist(
fish_caught ~ dzipois( p , lambda ),
logit(p) <- ap + bpLB*livebait + bpC*camper + bpP*persons + bpC*child,
log(lambda) <- log_hours + al + blLB*livebait + blC*camper + blP*persons + blC*child,
ap ~ dnorm(0,1),
al ~ dnorm(0,10),
c(bpLB,bpC,bpP,bpC,blLB,blC,blP,blC) ~ dnorm(0,5)
) ,
data=Fish, start=list(bpLB=0,bpC=0,bpP=0,bpC=0,blLB=0,blC=0,blP=0,blC=0) )
plot(precis(m11H6))
l=link(m11H6)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y=sim(m11H6)
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
[ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
y.mean <- colMeans(y)
plot(sort(Fish$fish_caught), pch=20, ylim=c(0,20))
points(y.mean[order(Fish$fish_caught)])
dens(Fish$fish_caught, xlim=c(0,50))
dens(y, xlim=c(0,50), col="red", lty=2, add=T)