library("car") # load the 'car' package
## Loading required package: carData
data("Anscombe") # load the data set
?Anscombe # read a description of the data
head(Anscombe) # look at the first few lines of the data
## education income young urban
## ME 189 2824 350.7 508
## NH 169 3259 345.9 564
## VT 230 3072 348.5 322
## MA 168 3835 335.3 846
## RI 180 3549 327.1 871
## CT 193 4256 341.0 774
pairs(Anscombe) # scatter plots for each pair of variables

education_model <- lm(education~income+young+urban,data=Anscombe)
summary(education_model)
##
## Call:
## lm(formula = education ~ income + young + urban, data = Anscombe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.240 -15.738 -1.156 15.883 51.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.868e+02 6.492e+01 -4.418 5.82e-05 ***
## income 8.065e-02 9.299e-03 8.674 2.56e-11 ***
## young 8.173e-01 1.598e-01 5.115 5.69e-06 ***
## urban -1.058e-01 3.428e-02 -3.086 0.00339 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.69 on 47 degrees of freedom
## Multiple R-squared: 0.6896, Adjusted R-squared: 0.6698
## F-statistic: 34.81 on 3 and 47 DF, p-value: 5.337e-12
library("rjags")
## Loading required package: coda
## Linked to JAGS 4.3.2
## Loaded modules: basemod,bugs
mod_string = " model {
for (i in 1:length(education)) {
education[i] ~ dnorm(mu[i], prec)
mu[i] = b0 + b[1]*income[i] + b[2]*young[i] + b[3]*urban[i]
}
b0 ~ dnorm(0.0, 1.0/1.0e6)
for (i in 1:3) {
b[i] ~ dnorm(0.0, 1.0/1.0e6)
}
prec ~ dgamma(1.0/2.0, 1.0*1500.0/2.0)
## Initial guess of variance based on overall
## variance of education variable. Uses low prior
## effective sample size. Technically, this is not
## a true 'prior', but it is not very informative.
sig2 = 1.0 / prec
sig = sqrt(sig2)
} "
data_jags = as.list(Anscombe)
params1 = c("b0","b", "sig")
inits1 = function() {
inits = list("b"=rnorm(3,0.0,100.0), "prec"=rgamma(1,1.0,1.0))
}
mod1 = jags.model(textConnection(mod_string), data=data_jags, inits=inits1, n.chains=3)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 51
## Unobserved stochastic nodes: 5
## Total graph size: 422
##
## Initializing model
update(mod1, 1000) # burn-in
mod1_sim = coda.samples(model=mod1,
variable.names=params1,
n.iter=5000)
mod1_csim = do.call(rbind, mod1_sim) # combine multiple chains
plot(mod1_sim)


gelman.diag(mod1_sim)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## b[1] 1.06 1.13
## b[2] 1.16 1.45
## b[3] 1.02 1.03
## b0 1.17 1.49
## sig 1.00 1.01
##
## Multivariate psrf
##
## 1.11
autocorr.diag(mod1_sim)
## b[1] b[2] b[3] b0 sig
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000 1.00000000
## Lag 1 0.9865652 0.9947547 0.9771881 0.9957758 0.09874593
## Lag 5 0.9360024 0.9754781 0.8960006 0.9796510 0.08412357
## Lag 10 0.8801171 0.9547797 0.8101269 0.9604307 0.07397704
## Lag 50 0.5449190 0.8200073 0.4148074 0.8247768 0.01873949
autocorr.plot(mod1_sim)



effectiveSize(mod1_sim)
## b[1] b[2] b[3] b0 sig
## 99.95165 34.96417 165.39745 30.93122 4578.33732
plot(education_model)




dic.samples(mod1, n.iter=1e5)
## Mean deviance: 481
## penalty 5.223
## Penalized deviance: 486.2
library("rjags")
mod_string_question_4a = " model {
for (i in 1:length(education)) {
education[i] ~ dnorm(mu[i], prec)
mu[i] = b0 + b[1]*income[i] + b[2]*young[i]
}
b0 ~ dnorm(0.0, 1.0/1.0e6)
for (i in 1:2) {
b[i] ~ dnorm(0.0, 1.0/1.0e6)
}
prec ~ dgamma(1.0/2.0, 1.0*1500.0/2.0)
## Initial guess of variance based on overall
## variance of education variable. Uses low prior
## effective sample size. Technically, this is not
## a true 'prior', but it is not very informative.
sig2 = 1.0 / prec
sig = sqrt(sig2)
} "
data_jags = as.list(Anscombe)
params1 = c("b0","b", "sig")
inits1 = function() {
inits = list("b"=rnorm(2,0.0,100.0), "prec"=rgamma(1,1.0,1.0))
}
mod_question_4a = jags.model(textConnection(mod_string_question_4a), data=data_jags, inits=inits1, n.chains=3)
## Warning in jags.model(textConnection(mod_string_question_4a), data = data_jags,
## : Unused variable "urban" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 51
## Unobserved stochastic nodes: 4
## Total graph size: 320
##
## Initializing model
update(mod_question_4a, 1000) # burn-in
mod_question_4a_sim = coda.samples(model=mod_question_4a,
variable.names=params1,
n.iter=5000)
mod_question_4a_csim = do.call(rbind, mod_question_4a_sim) # combine multiple chains
plot(mod_question_4a_sim)

gelman.diag(mod_question_4a_sim)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## b[1] 1.01 1.01
## b[2] 1.16 1.48
## b0 1.12 1.35
## sig 1.01 1.02
##
## Multivariate psrf
##
## 1.1
autocorr.diag(mod_question_4a_sim)
## b[1] b[2] b0 sig
## Lag 0 1.0000000 1.0000000 1.0000000 1.00000000
## Lag 1 0.9731861 0.9938796 0.9950769 0.07777506
## Lag 5 0.8721065 0.9696513 0.9757617 0.06467455
## Lag 10 0.7570684 0.9412230 0.9510940 0.04810342
## Lag 50 0.2867119 0.7417381 0.7806945 0.04089848
autocorr.plot(mod_question_4a_sim)



effectiveSize(mod_question_4a_sim)
## b[1] b[2] b0 sig
## 203.85946 46.06856 37.03094 4123.32051
dic.samples(mod_question_4a, n.iter=1e5)
## Mean deviance: 489.2
## penalty 4.149
## Penalized deviance: 493.3
library("rjags")
mod_string_question_4b = " model {
for (i in 1:length(education)) {
education[i] ~ dnorm(mu[i], prec)
mu[i] = b0 + b[1]*income[i] + b[2]*young[i]+ b[3]*young[i]*income[i]
}
b0 ~ dnorm(0.0, 1.0/1.0e6)
for (i in 1:3) {
b[i] ~ dnorm(0.0, 1.0/1.0e6)
}
prec ~ dgamma(1.0/2.0, 1.0*1500.0/2.0)
## Initial guess of variance based on overall
## variance of education variable. Uses low prior
## effective sample size. Technically, this is not
## a true 'prior', but it is not very informative.
sig2 = 1.0 / prec
sig = sqrt(sig2)
} "
data_jags = as.list(Anscombe)
params1 = c("b0","b", "sig")
inits1 = function() {
inits = list("b"=rnorm(3,0.0,100.0), "prec"=rgamma(1,1.0,1.0))
}
mod_question_4b = jags.model(textConnection(mod_string_question_4b), data=data_jags, inits=inits1, n.chains=3)
## Warning in jags.model(textConnection(mod_string_question_4b), data = data_jags,
## : Unused variable "urban" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 51
## Unobserved stochastic nodes: 5
## Total graph size: 372
##
## Initializing model
update(mod_question_4b, 1000) # burn-in
mod_question_4b_sim = coda.samples(model=mod_question_4b,
variable.names=params1,
n.iter=5000)
mod_question_4b_csim = do.call(rbind, mod_question_4b_sim) # combine multiple chains
plot(mod_question_4b_sim)


gelman.diag(mod_question_4b_sim)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## b[1] 17.31 39.50
## b[2] 16.99 42.18
## b[3] 17.14 37.76
## b0 16.89 44.40
## sig 2.39 4.24
##
## Multivariate psrf
##
## 13.4
autocorr.diag(mod_question_4b_sim)
## b[1] b[2] b[3] b0 sig
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1 0.9967772 0.9969899 0.9964436 0.9966958 0.6937121
## Lag 5 0.9841114 0.9844832 0.9822237 0.9860516 0.6741533
## Lag 10 0.9683321 0.9705026 0.9644882 0.9743438 0.6530952
## Lag 50 0.8532070 0.8877688 0.8433282 0.8784100 0.5382250
autocorr.plot(mod_question_4b_sim)



effectiveSize(mod_question_4b_sim)
## b[1] b[2] b[3] b0 sig
## 24.21060 15.23749 26.40313 20.28826 260.42086
dic.samples(mod_question_4b, n.iter=1e5)
## Mean deviance: 487.3
## penalty 5.476
## Penalized deviance: 492.8