library(rethinking)
## Loading required package: rstan
## Loading required package: Rcpp
## Loading required package: ggplot2
## rstan (Version 2.8.0, packaged: 2015-09-19 14:48:38 UTC, GitRev: 05c3d0058b6a)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## Loading required package: parallel
## rethinking (Version 1.57)
mu_prior <- rnorm( 1e4 , 0 , 10 )
sigma_prior <- runif( 1e4 , 0, 10 )
## R code 3.2
h_sim <- rnorm( 1e4 , mu_prior , sigma_prior )
dens( h_sim )
## R code 3.3
f <- alist(
y ~ dnorm( mu , sigma ),
mu ~ dnorm( 0 , 10 ),
sigma ~ dunif( 0 , 10 )
)
library(rethinking)
data(Howell1)
d <- Howell1
d2 <- d[d$age>=18,]
head(d2)
## height weight age male
## 1 151.765 47.82561 63 1
## 2 139.700 36.48581 63 0
## 3 136.525 31.86484 65 0
## 4 156.845 53.04191 41 1
## 5 145.415 41.27687 51 0
## 6 163.830 62.99259 35 1
##
library(corrplot)
M <- cor(d2, method="spearman", use="pairwise.complete.obs")
corrplot.mixed(M, order = "hclust")
m <- map(
alist(
height ~ dnorm( mu , sigma ) ,
mu <- a + b*weight ,
a ~ dnorm( 100 , 100 ) ,
b ~ dnorm( 0 , 10 ) ,
sigma ~ dunif( 0 , 50 )
) ,
data=d2 )
## R code 3.5
post <- extract.samples( m )
str(post)
## 'data.frame': 10000 obs. of 3 variables:
## $ a : num 116 114 114 111 115 ...
## $ b : num 0.87 0.899 0.895 0.963 0.891 ...
## $ sigma: num 5.09 4.9 5.18 5.47 4.95 ...
a1 <- mean(post$a)
b1 <- mean(post$b)
plot(d2$height ~ d2$weight,data=d2,type="p")
abline(a=a1,b=b1,col=2)
## R code 3.6
(ww <- mean(d2$weight))
## [1] 44.99049
y <- rnorm( 1e5 , post$a + post$b*ww , post$sigma )
mean(y)
## [1] 154.5967
HPDI(y,prob=0.90)
## |0.9 0.9|
## 146.3533 163.0771
## R code 3.7
library(rethinking)
data(Howell1)
d <- Howell1
d3 <- d[ d$age < 18 , ]
#str(d3)
head(d3)
## height weight age male
## 19 121.92 19.61785 12.0 1
## 20 105.41 13.94795 8.0 0
## 21 86.36 10.48931 6.5 0
## 24 129.54 23.58678 13.0 1
## 25 109.22 15.98912 7.0 0
## 29 137.16 27.32892 17.0 1
## R code 3.8
m <- map(
alist(
height ~ dnorm( mu , sigma ) ,
mu <- a + b*weight ,
a ~ dnorm( 100 , 100 ),
b ~ dnorm( 0 , 10 ) ,
sigma ~ dunif( 0 , 50 )
) ,
data=d3 )
precis(m)
## Mean StdDev 5.5% 94.5%
## a 58.24 1.40 56.01 60.47
## b 2.72 0.07 2.61 2.83
## sigma 8.44 0.43 7.75 9.13
## R code 3.9
post <- extract.samples( m )
w.seq <- seq(from=1,to=45,length.out=50)
mu <- sapply( w.seq , function(z)
mean( post$a + post$b*z ) )
head(mu)
## [1] 60.96361 63.40528 65.84694 68.28861 70.73027 73.17194
mu.ci <- sapply( w.seq , function(z)
HPDI( post$a + post$b*z , prob=0.9 ) )
head(mu.ci)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## |0.9 58.80998 61.28538 63.84124 66.35848 68.87907 71.41458 73.89697
## 0.9| 63.18100 65.47898 67.85694 70.20748 72.56393 74.94198 77.26969
## [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## |0.9 76.41050 79.03251 81.56502 84.00060 86.53342 89.06721 91.58083
## 0.9| 79.62988 82.09177 84.47189 86.76417 89.15075 91.55789 93.96001
## [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## |0.9 94.03917 96.52129 98.98023 101.4345 103.9091 106.3254 108.7236
## 0.9| 96.31869 98.70820 101.09538 103.4919 105.9268 108.3197 110.7218
## [,22] [,23] [,24] [,25] [,26] [,27] [,28]
## |0.9 111.2327 113.6369 116.0530 118.5021 120.8546 123.3190 125.7185
## 0.9| 113.2511 115.6895 118.1616 120.6789 123.1249 125.6947 128.2149
## [,29] [,30] [,31] [,32] [,33] [,34] [,35]
## |0.9 128.0627 130.3917 132.7737 135.1784 137.5317 139.9134 142.2818
## 0.9| 130.6856 133.1452 135.6766 138.2301 140.7473 143.2857 145.8247
## [,36] [,37] [,38] [,39] [,40] [,41] [,42]
## |0.9 144.5963 146.9832 149.3453 151.7021 154.0637 156.3358 158.7022
## 0.9| 148.3023 150.8529 153.3904 155.9168 158.4634 160.9208 163.4615
## [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## |0.9 161.1128 163.4883 165.8642 168.1818 170.5847 172.9368 175.2930
## 0.9| 166.0682 168.6243 171.1862 173.6957 176.2899 178.8280 181.3803
## [,50]
## |0.9 177.6439
## 0.9| 183.9275
pred.ci <- sapply( w.seq , function(z)
HPDI( rnorm(10000,post$a + post$b*z,post$sigma) , 0.9 ) )
head(pred.ci)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## |0.9 46.88479 50.11814 51.22570 54.97390 57.08727 59.15942 62.16241
## 0.9| 74.87655 78.02192 79.30295 82.73892 85.09861 87.37907 90.30810
## [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## |0.9 64.47044 66.64285 69.15322 71.45639 73.57257 76.27791 78.65279
## 0.9| 92.16292 94.48768 96.82511 99.10752 101.40155 104.00481 106.56446
## [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## |0.9 81.43655 83.32075 85.84946 88.9576 91.30465 93.7136 96.91173
## 0.9| 109.20492 111.25570 113.61771 116.9466 118.89678 121.7126 124.17047
## [,22] [,23] [,24] [,25] [,26] [,27] [,28]
## |0.9 98.22256 100.1222 103.3715 105.2940 108.0488 110.4498 113.7086
## 0.9| 126.18514 127.9306 130.8362 133.5393 135.9815 138.3335 141.6915
## [,29] [,30] [,31] [,32] [,33] [,34] [,35]
## |0.9 116.0197 117.5983 120.6138 122.8215 124.9461 127.4873 130.2748
## 0.9| 143.6873 145.6608 148.3188 150.4747 152.9400 155.1841 158.3051
## [,36] [,37] [,38] [,39] [,40] [,41] [,42]
## |0.9 132.4239 134.6475 136.7337 139.7483 141.6374 145.0663 147.3486
## 0.9| 160.8195 162.7424 164.7221 168.0971 169.8181 172.9103 175.0184
## [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## |0.9 149.7455 151.9824 154.1939 156.8269 159.6685 161.1901 164.3707
## 0.9| 177.5080 180.0122 182.7952 185.2335 187.8423 189.5075 192.9891
## [,50]
## |0.9 166.9886
## 0.9| 195.2395
## R code 3.10
plot( height ~ weight , data=d3 ,
col=col.alpha("slateblue",0.5) , cex=0.5 )
lines( w.seq , mu )
lines( w.seq , mu.ci[1,] , lty=2 )
lines( w.seq , mu.ci[2,] , lty=2 )
lines( w.seq , pred.ci[1,] , lty=2 )
lines( w.seq , pred.ci[2,] , lty=2 )
## R code 3.11
library(rethinking)
data(Howell1)
d <- Howell1
##
library(corrplot)
M <- cor(d, method="spearman", use="pairwise.complete.obs")
corrplot.mixed(M, order = "hclust")
mlw <- map(
alist(
height ~ dnorm( mean=mu , sd=sigma ) ,
mu <- a + b*log(weight) ,#log
a ~ dnorm( 138 , 100 ) ,
b ~ dnorm( 0 , 100 ) ,
sigma ~ dunif( 0 , 50 )
) ,
data=d )
precis(mlw)
## Mean StdDev 5.5% 94.5%
## a -23.79 1.34 -25.93 -21.66
## b 47.08 0.38 46.47 47.69
## sigma 5.13 0.16 4.89 5.38
## R code 3.12
post <- extract.samples(mlw)
lw.seq <- seq(from=1.4,to=4.2,length.out=50)###!!!
mu <- sapply( lw.seq , function(z) mean( post$a+post$b*z ) )
mu.ci <- sapply( lw.seq , function(z) HPDI( post$a+post$b*z ) )
h.ci <- sapply( lw.seq , function(z)
HPDI( rnorm(10000,post$a+post$b*z,post$sigma) ) )
## R code 3.13
plot( height ~ weight , data=d , col=col.alpha("slateblue",0.4) )
lines( exp(lw.seq) , mu )
lines( exp(lw.seq) , mu.ci[1,] , lty=2 ,col=4)
lines( exp(lw.seq) , mu.ci[2,] , lty=2 ,col=4)
lines( exp(lw.seq) , h.ci[1,] , lty=2,col=2 )
lines( exp(lw.seq) , h.ci[2,] , lty=2,col=2 )
library(rethinking)
data(WaffleDivorce)
d <- WaffleDivorce
d$pct_LDS <- c(0.75, 4.53, 6.18, 1, 2.01, 2.82, 0.43, 0.55, 0.38,
0.75, 0.82, 5.18, 26.35, 0.44, 0.66, 0.87, 1.25, 0.77, 0.64, 0.81,
0.72, 0.39, 0.44, 0.58, 0.72, 1.14, 4.78, 1.29, 0.61, 0.37, 3.34,
0.41, 0.82, 1.48, 0.52, 1.2, 3.85, 0.4, 0.37, 0.83, 1.27, 0.75,
1.21, 67.97, 0.74, 1.13, 3.99, 0.92, 0.44, 11.5 )
head(d)
## Location Loc Population MedianAgeMarriage Marriage Marriage.SE Divorce
## 1 Alabama AL 4.78 25.3 20.2 1.27 12.7
## 2 Alaska AK 0.71 25.2 26.0 2.93 12.5
## 3 Arizona AZ 6.33 25.8 20.3 0.98 10.8
## 4 Arkansas AR 2.92 24.3 26.4 1.70 13.5
## 5 California CA 37.25 26.8 19.1 0.39 8.0
## 6 Colorado CO 5.03 25.7 23.5 1.24 11.6
## Divorce.SE WaffleHouses South Slaves1860 Population1860 PropSlaves1860
## 1 0.79 128 1 435080 964201 0.45
## 2 2.05 0 0 0 0 0.00
## 3 0.74 18 0 0 0 0.00
## 4 1.22 41 1 111115 435450 0.26
## 5 0.24 0 0 0 379994 0.00
## 6 0.94 11 0 0 34277 0.00
## pct_LDS
## 1 0.75
## 2 4.53
## 3 6.18
## 4 1.00
## 5 2.01
## 6 2.82
d1 <- d[,c("Divorce","Marriage","MedianAgeMarriage","pct_LDS")]
head(d1)
## Divorce Marriage MedianAgeMarriage pct_LDS
## 1 12.7 20.2 25.3 0.75
## 2 12.5 26.0 25.2 4.53
## 3 10.8 20.3 25.8 6.18
## 4 13.5 26.4 24.3 1.00
## 5 8.0 19.1 26.8 2.01
## 6 11.6 23.5 25.7 2.82
library(corrplot)
M <- cor(d1, method="spearman", use="pairwise.complete.obs")
corrplot.mixed(M, order = "hclust")
m_5M4 <- map(
alist(
Divorce ~ dnorm(mu,sigma),
mu <- a + bR*Marriage + bA*MedianAgeMarriage +
bM*pct_LDS,
a ~ dnorm(0,100),
c(bA,bR,bM) ~ dnorm(0,10),
sigma ~ dunif(0,10)
),
data=d )
precis( m_5M4 )
## Mean StdDev 5.5% 94.5%
## a 38.47 6.93 27.39 49.56
## bA -1.10 0.22 -1.46 -0.74
## bR 0.00 0.08 -0.12 0.12
## bM -0.06 0.02 -0.10 -0.03
## sigma 1.34 0.13 1.13 1.55
## R code 4.3
library(rethinking)
data(foxes)
d <- foxes
head(d)
## group avgfood groupsize area weight
## 1 1 0.37 2 1.09 5.02
## 2 1 0.37 2 1.09 2.84
## 3 2 0.53 2 2.05 5.33
## 4 2 0.53 2 2.05 6.07
## 5 3 0.49 2 2.12 5.85
## 6 3 0.49 2 2.12 3.25
summary(d)
## group avgfood groupsize area
## Min. : 1.00 Min. :0.3700 Min. :2.000 Min. :1.090
## 1st Qu.:11.75 1st Qu.:0.6600 1st Qu.:3.000 1st Qu.:2.590
## Median :18.00 Median :0.7350 Median :4.000 Median :3.130
## Mean :17.21 Mean :0.7517 Mean :4.345 Mean :3.169
## 3rd Qu.:24.00 3rd Qu.:0.8000 3rd Qu.:5.000 3rd Qu.:3.772
## Max. :30.00 Max. :1.2100 Max. :8.000 Max. :5.070
## weight
## Min. :1.920
## 1st Qu.:3.720
## Median :4.420
## Mean :4.530
## 3rd Qu.:5.375
## Max. :7.550
str(d)
## 'data.frame': 116 obs. of 5 variables:
## $ group : int 1 1 2 2 3 3 4 4 5 5 ...
## $ avgfood : num 0.37 0.37 0.53 0.53 0.49 0.49 0.45 0.45 0.74 0.74 ...
## $ groupsize: int 2 2 2 2 2 2 2 2 3 3 ...
## $ area : num 1.09 1.09 2.05 2.05 2.12 2.12 1.29 1.29 3.78 3.78 ...
## $ weight : num 5.02 2.84 5.33 6.07 5.85 3.25 4.53 4.09 6.13 5.59 ...
#require(GGally)
#ggpairs(data=d, # data.frame with variables
# columns=1:4, # columns to plot, default to all.
# title="foxes data", # title of the plot
# colour = "groupsize ") # aesthetics, ggplot2 style
library(corrplot)
M <- cor(d, method="spearman", use="pairwise.complete.obs")
corrplot.mixed(M, order = "hclust")
m1 <- map2stan(
alist(
weight ~ dnorm( mu , sigma ) ,
mu <- a + ba*area ,
a ~ dnorm(0,100),
ba ~ dnorm(0,10),
sigma ~ dunif(0,50)
) , data=d )
##
## SAMPLING FOR MODEL 'weight ~ dnorm(mu, sigma)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1, Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1, Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1, Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1, Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%] (Sampling)
## # Elapsed Time: 0.236 seconds (Warm-up)
## # 0.249 seconds (Sampling)
## # 0.485 seconds (Total)
##
##
## SAMPLING FOR MODEL 'weight ~ dnorm(mu, sigma)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
m2 <- map2stan(
alist(
weight ~ dnorm( mu , sigma ) ,
mu <- a + bg*groupsize ,
a ~ dnorm(0,100),
bg ~ dnorm(0,10),
sigma ~ dunif(0,50)
) , data=d )
##
## SAMPLING FOR MODEL 'weight ~ dnorm(mu, sigma)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1, Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1, Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1, Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1, Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1, Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1, Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1, Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1, Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1, Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1, Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1, Iteration: 2000 / 2000 [100%] (Sampling)
## # Elapsed Time: 0.218 seconds (Warm-up)
## # 0.22 seconds (Sampling)
## # 0.438 seconds (Total)
##
##
## SAMPLING FOR MODEL 'weight ~ dnorm(mu, sigma)' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1 [100%] (Sampling)
## # Elapsed Time: 0 seconds (Warm-up)
## # 0 seconds (Sampling)
## # 0 seconds (Total)
## Computing WAIC
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
precis(m1)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 4.43 0.40 3.78 5.09 276 1
## ba 0.03 0.12 -0.15 0.23 278 1
## sigma 1.20 0.08 1.07 1.31 413 1
precis(m2)
## Mean StdDev lower 0.89 upper 0.89 n_eff Rhat
## a 5.08 0.32 4.57 5.60 310 1
## bg -0.13 0.07 -0.25 -0.03 285 1
## sigma 1.19 0.08 1.07 1.31 410 1
## R code 4.4
x.seq <- seq(from=0,to=6,by=0.025)
mu <- link( m1 , data=list(area=x.seq) )#予測値
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply( mu , 2 , mean )
mu.ci <- apply( mu , 2 , PI )
plot( weight ~ area , data=d , col="slateblue" )
lines( x.seq , mu.mean )
lines( x.seq , mu.ci[1,] , lty=2 )
lines( x.seq , mu.ci[2,] , lty=2 )
## R code 4.5
x.seq <- seq(from=1,to=9,by=0.5)
mu <- link( m2 , data=list(groupsize=x.seq) )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply( mu , 2 , mean )
mu.ci <- apply( mu , 2 , PI )
plot( weight ~ groupsize , data=d , col="slateblue" )
lines( x.seq , mu.mean )
lines( x.seq , mu.ci[1,] , lty=2 )
lines( x.seq , mu.ci[2,] , lty=2 )
## R code 4.6
m3 <- map(
alist(
weight ~ dnorm( mu , sigma ) ,
mu <- a + ba*area + bg*groupsize ,
a ~ dnorm(0,100),
c(ba,bg) ~ dnorm(0,10),
sigma ~ dunif(0,50)
) , data=d )
precis(m3)
## Mean StdDev 5.5% 94.5%
## a 4.45 0.37 3.86 5.04
## ba 0.62 0.20 0.30 0.94
## bg -0.43 0.12 -0.63 -0.24
## sigma 1.12 0.07 1.00 1.24
## R code 4.7
area.seq <- seq(from=1,to=6,by=0.5)
pred.dat <- data.frame( area=area.seq ,
groupsize=mean(d$groupsize) )
mu <- link( m3 , data=pred.dat ) ###!!!
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply( mu , 2 , mean )
mu.ci <- apply( mu , 2 , PI )
plot( weight ~ area , data=d , type="p" )
lines( area.seq , mu.mean )
lines( area.seq , mu.ci[1,] , lty=2 )
lines( area.seq , mu.ci[2,] , lty=2 )
## R code 4.8
gs.seq <- seq(from=1,to=9,by=0.5)
pred.dat <- data.frame( area=mean(d$area) ,
groupsize=gs.seq )
mu <- link( m3 , data=pred.dat ) ###!!!
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply( mu , 2 , mean )
mu.ci <- apply( mu , 2 , PI )
plot( weight ~ groupsize , data=d , type="p" )
lines( gs.seq , mu.mean )
lines( gs.seq , mu.ci[1,] , lty=2 )
lines( gs.seq , mu.ci[2,] , lty=2 )
## R code 4.9
plot( groupsize ~ area , data=d , col="slateblue")
## R code 4.10
m4 <- map(
alist(
weight ~ dnorm( mu , sigma ) ,
mu <- a + bf*avgfood + bg*groupsize ,
a ~ dnorm(0,100),
c(bf,bg) ~ dnorm(0,10),
sigma ~ dunif(0,50)
) , data=d )
m5 <- map(
alist(
weight ~ dnorm( mu , sigma ) ,
mu <- a + bf*avgfood + bg*groupsize + ba*area ,
a ~ dnorm(0,100),
c(bf,bg,ba) ~ dnorm(0,10),
sigma ~ dunif(0,50)
) , data=d )
## R code 4.11
precis(m4)
## Mean StdDev 5.5% 94.5%
## a 4.14 0.43 3.45 4.83
## bf 3.77 1.20 1.85 5.70
## bg -0.56 0.16 -0.81 -0.31
## sigma 1.12 0.07 1.00 1.23
## R code 4.12
food.seq <- seq(from=0,to=1.5,by=0.1)
pred.dat <- data.frame( avgfood=food.seq ,
groupsize=mean(d$groupsize) )
mu <- link( m4 , data=pred.dat ) ###!!!
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
mu.mean <- apply( mu , 2 , mean )
mu.ci <- apply( mu , 2 , PI )
plot( weight ~ avgfood , data=d , type="p" )
lines( food.seq , mu.mean )
lines( food.seq , mu.ci[1,] , lty=2 )
lines( food.seq , mu.ci[2,] , lty=2 )
## R code 4.13
-logLik(m3)
## 'log Lik.' 177.5824 (df=4)
-logLik(m4)
## 'log Lik.' 177.3915 (df=4)
## R code 4.14
precis(m5)
## Mean StdDev 5.5% 94.5%
## a 4.07 0.43 3.39 4.76
## bf 2.46 1.44 0.16 4.76
## bg -0.60 0.16 -0.85 -0.35
## ba 0.39 0.24 0.01 0.77
## sigma 1.10 0.07 0.99 1.22
#WAIC(m4)
compare(m3,m4,m5)
## WAIC pWAIC dWAIC weight SE dSE
## m5 363.0 5.4 0.0 0.36 16.74 NA
## m4 363.3 4.1 0.2 0.32 16.67 3.99
## m3 363.3 4.0 0.2 0.32 16.07 3.95
p <- c( 0.7 , 0.3 )
-sum( p*log(p) )
p <- c( 0.2 , 0.25 , 0.25 , 0.3 )
-sum( p*log(p) )
p <- c( 1/3 , 1/3 , 1/3 )
-sum( p*log(p) )
y <- rnorm(10) # execute just once, to get data
m <- map( alist( y ~ dnorm(mu,1), mu ~ dnorm(0,sigma) ), data=list(y=y,sigma=10) ) WAIC(m)
f1 <- alist(
height ~ dnorm(mu,sigma),
mu <- a + b1*age,
c(a,b1) ~ dnorm(0,100),
sigma ~ dunif(0,50)
)
f2 <- alist(
height ~ dnorm(mu,sigma),
mu <- a + b1*age + b2*age^2,
c(a,b1,b2) ~ dnorm(0,100),
sigma ~ dunif(0,50)
)
f3 <- alist(
height ~ dnorm(mu,sigma),
mu <- a + b1*age + b2*age^2 + b3*age^3,
c(a,b1,b2,b3) ~ dnorm(0,100),
sigma ~ dunif(0,50)
)
f4 <- alist(
height ~ dnorm(mu,sigma),
mu <- a + b1*age + b2*age^2 + b3*age^3 + b4*age^4,
c(a,b1,b2,b3,b4) ~ dnorm(0,100),
sigma ~ dunif(0,50)
)
f5 <- alist(
height ~ dnorm(mu,sigma),
mu <- a + b1*age + b2*age^2 + b3*age^3 +
b4*age^4 + b5*age^5,
c(a,b1,b2,b3,b4,b5) ~ dnorm(0,100),
sigma ~ dunif(0,50)
)
f6 <- alist(
height ~ dnorm(mu,sigma),
mu <- a + b1*age + b2*age^2 + b3*age^3 +
b4*age^4 + b5*age^5 + b6*age^6,
c(a,b1,b2,b3,b4,b5,b6) ~ dnorm(0,100),
sigma ~ dunif(0,50)
)
## R code 5.6
# compute starting values for a and sigma
library(rethinking)
data(Howell1)
d <- Howell1
d2 <- d[d$age>=18,]
head(d2)
## height weight age male
## 1 151.765 47.82561 63 1
## 2 139.700 36.48581 63 0
## 3 136.525 31.86484 65 0
## 4 156.845 53.04191 41 1
## 5 145.415 41.27687 51 0
## 6 163.830 62.99259 35 1
a.start <- mean(d2$height)
sigma.start <- sd(d2$height)
## R code 5.7
# fit models
m1 <- map( f1 , data=d2 ,
start=list(a=a.start,
sigma=sigma.start,
b1=0) )
m2 <- map( f2 , data=d2 ,
start=list(a=a.start,
sigma=sigma.start,
b1=0,b2=0) )
m3 <- map( f3 , data=d2 ,
start=list(a=a.start,
sigma=sigma.start,
b1=0,b2=0,
b3=0) )
#m4 <- map( f4 , data=d2 ,
# start=list(a=a.start,
# sigma=sigma.start,
# b1=0,b2=0,
# b3=0,b4=0) )
#m5 <- map( f5 , data=d2 ,
# start=list(a=a.start,
# sigma=sigma.start,
# b1=0,b2=0,
# b3=0,b4=0,b5=0) )
#m6 <- map( f6 , data=d2 ,
# start=list(a=a.start,sigma=sigma.start,
# b1=0,b2=0,
# b3=0,b4=0,b5=0,b6=0) )
## R code 5.8
compare(m1,m2,m3)#,m4,m5,m6)
## WAIC pWAIC dWAIC weight SE dSE
## m3 2436.9 4.5 0.0 0.49 23.08 NA
## m2 2437.1 3.5 0.2 0.44 22.77 3.41
## m1 2440.6 2.5 3.7 0.08 23.00 5.93
## R code 5.9
# select out model to plot
#m_plot <- m4
m_plot <- m3 ###!!!
# define sequence of ages to compute values over
age.seq <- seq( from=-2 , to=3 , length.out=30 )
# compute posterior predictions for mu
mu <- link( m_plot , data=list(age=age.seq) ) ###!!!
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
# compute average prediction
mu.mean <- apply( mu , 2 , mean )
# compute 97% interval of average
mu.ci <- apply( mu , 2 , PI , prob=0.97 )
# compute interval of height
h <- sim( m_plot , data=list(age=age.seq) )
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
height.ci <- apply( h , 2 , PI )
# plot it all
plot( d2$height ~ d2$age , d2 ,type="p", col="slateblue" ,
xlim=c(-2,3) )
lines( age.seq , mu.mean )
shade( mu.ci , age.seq )
shade( height.ci , age.seq )
## R code 5.10
#h.ensemble <- ensemble( m4,m5,m6 , data=list(age=age.seq) )
h.ensemble <- ensemble( m1,m2,m3 , data=list(age=age.seq) )
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## Constructing posterior predictions
## [ 100 / 1000 ]
[ 200 / 1000 ]
[ 300 / 1000 ]
[ 400 / 1000 ]
[ 500 / 1000 ]
[ 600 / 1000 ]
[ 700 / 1000 ]
[ 800 / 1000 ]
[ 900 / 1000 ]
[ 1000 / 1000 ]
## R code 5.11
#h.ensemble <- ensemble( m4,m5,m6 , data=list(age=age.seq) )
mu.mean <- apply( h.ensemble$link , 2 , mean )
mu.ci <- apply( h.ensemble$link , 2 , PI )
height.ci <- apply( h.ensemble$sim , 2 , PI )
plot( height ~ age , d2 , col="slateblue" , xlim=c(-2,3) )
lines( age.seq , mu.mean )
shade( mu.ci , age.seq )
shade( height.ci , age.seq )
## R code 5.12
k <- coef(m1)
k
## a sigma b1
## 156.60827221 7.69113915 -0.04895203
mu <- k['a'] + k['b1']*d2$age
dev.m1 <- (-2)*sum( dnorm( d2$height , mu , k['sigma'] , log=TRUE ) )
k <- coef(m2)
k
## a sigma b1 b2
## 150.375862261 7.630583542 0.259881119 -0.003325571
mu <- k['a'] + k['b1']*d2$age + k['b2']*d2$age^2
dev.m2 <- (-2)*sum( dnorm( d2$height , mu , k['sigma'] , log=TRUE ) )
k <- coef(m3)
k
## a sigma b1 b2 b3
## 1.395655e+02 7.605122e+00 1.076594e+00 -2.174700e-02 1.260828e-04
mu <- k['a'] + k['b1']*d2$age + k['b2']*d2$age^2 + k['b3']*d2$age^3
dev.m3 <- (-2)*sum( dnorm( d2$height , mu , k['sigma'] , log=TRUE ) )
#k <- coef(m4)
#mu <- k['a'] + k['b1']*d2$age + k['b2']*d2$age^2 + k['b3']*d2$age^3 +
# k['b4']*d2$age^4
#dev.m4 <- (-2)*sum( dnorm( d2$height , mu , k['sigma'] , log=TRUE ) )
#k <- coef(m5)
#mu <- k['a'] + k['b1']*d2$age + k['b2']*d2$age^2 + k['b3']*d2$age^3 +
# k['b4']*d2$age^4 + k['b5']*d2$age^5
#dev.m5 <- (-2)*sum( dnorm( d2$height , mu , k['sigma'] , log=TRUE ) )
#k <- coef(m6)
#mu <- k['a'] + k['b1']*d2$age + k['b2']*d2$age^2 + k['b3']*d2$age^3 +
# k['b4']*d2$age^4 + k['b5']*d2$age^5 + k['b6']*d2$age^6
#dev.m6 <- (-2)*sum( dnorm( d2$height , mu , k['sigma'] , log=TRUE ) )
## R code 5.13
#compare.tab <- compare(m1,m2,m3,m4,m5,m6,sort=FALSE)
compare.tab <- compare(m1,m2,m3,sort=FALSE)
tab <- data.frame(
WAIC=compare.tab@output$WAIC,
#dev_out=c(dev.m1,dev.m2,dev.m3,dev.m4,dev.m5,dev.m6)
dev_out=c(dev.m1,dev.m2,dev.m3)
)
rownames(tab) <- rownames(compare.tab@output)
# display, sorted by dev out
tab[ order(tab$dev_out) , ]
## WAIC dev_out
## m3 2436.663 2427.229
## m2 2437.253 2429.619
## m1 2441.083 2435.141
## R code 5.14
dWAIC <- tab$WAIC - min(tab$WAIC)
ddev_out <- tab$dev_out - min(tab$dev_out)
plot( 1:3 , dWAIC , type="l" , xlab="model" , ylab="deviance" )
lines( 1:3 , ddev_out , col="red" )
#plot( 4:6 , dWAIC[4:6] , type="l" , xlab="model" ,
# ylab="deviance" , ylim=c(0,2.2) )
#lines( 4:6 , ddev_out[4:6] , col="red" )
f <- alist( height ~ dnorm( mu , sigma ), mu <- a + b1age + b2age^2 + b3age^3 + b4age^4 + b5age^5 + b6age^6, c(b1,b2,b3,b4,b5,b6) ~ dnorm( 0 , 5 ) )
m <- map( f , data=d2 , start=list( a=mean(d2\(height), b1=0,b2=0,b3=0,b4=0,b5=0,b6=0, sigma=sd(d2\)height) ) )
precis(m)
library(coda) ## R code 5.17 plot( coeftab(m3,m) , pars=c(“b1”,“b2”,“b3”,“b4”,“b5”,“b6”) )
age.seq <- seq( from=-2 , to=3 , length.out=30 ) mu <- link( m , data=list(age=age.seq) ) h <- sim( m , data=list(age=age.seq) )
mu.mean <- apply( mu , 2 , mean) mu.ci <- apply( mu , 2 , PI ) height.ci <- apply( h , 2 , PI )
plot( height ~ age , d2 , col=“slateblue” , xlim=c(-2,3) ) lines( age.seq , mu.mean ) shade( mu.ci , age.seq ) shade( height.ci , age.seq )
k <- coef(m) mu <- k[‘a’] + k[‘b1’]d2\(age + k['b2']*d2\)age^2 + k[‘b3’]d2\(age^3 + k['b4']*d2\)age^4 + k[‘b5’]d2\(age^5 + k['b6']*d2\)age^6 dev <- (-2)sum( dnorm( d2$height , mean=mu , sd=k[‘sigma’] , log=TRUE ) ) dev
new_tab <- rbind( c(WAIC(m),dev) , tab ) rownames(new_tab)[1] <- “m” new_tab[ order(new_tab$dev_out) , ]