Linear Regression Quizes

library("car")  # load the 'car' package

## Loading required package: carData

data("Anscombe")  # load the data set
?Anscombe  # read a description of the data
head(Anscombe)  # look at the first few lines of the data

##    education income young urban
## ME       189   2824 350.7   508
## NH       169   3259 345.9   564
## VT       230   3072 348.5   322
## MA       168   3835 335.3   846
## RI       180   3549 327.1   871
## CT       193   4256 341.0   774

pairs(Anscombe)  # scatter plots for each pair of variables

education_model <- lm(education~income+young+urban,data=Anscombe)
summary(education_model)

## 
## Call:
## lm(formula = education ~ income + young + urban, data = Anscombe)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -60.240 -15.738  -1.156  15.883  51.380 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.868e+02  6.492e+01  -4.418 5.82e-05 ***
## income       8.065e-02  9.299e-03   8.674 2.56e-11 ***
## young        8.173e-01  1.598e-01   5.115 5.69e-06 ***
## urban       -1.058e-01  3.428e-02  -3.086  0.00339 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.69 on 47 degrees of freedom
## Multiple R-squared:  0.6896, Adjusted R-squared:  0.6698 
## F-statistic: 34.81 on 3 and 47 DF,  p-value: 5.337e-12

library("rjags")

## Loading required package: coda

## Linked to JAGS 4.3.2

## Loaded modules: basemod,bugs

mod_string = " model {
    for (i in 1:length(education)) {
        education[i] ~ dnorm(mu[i], prec)
        mu[i] = b0 + b[1]*income[i] + b[2]*young[i] + b[3]*urban[i]
    }
    
    b0 ~ dnorm(0.0, 1.0/1.0e6)
    for (i in 1:3) {
        b[i] ~ dnorm(0.0, 1.0/1.0e6)
    }
    
    prec ~ dgamma(1.0/2.0, 1.0*1500.0/2.0)
        ## Initial guess of variance based on overall
        ## variance of education variable. Uses low prior
        ## effective sample size. Technically, this is not
        ## a true 'prior', but it is not very informative.
    sig2 = 1.0 / prec
    sig = sqrt(sig2)
} "

data_jags = as.list(Anscombe)

params1 = c("b0","b", "sig")

inits1 = function() {
    inits = list("b"=rnorm(3,0.0,100.0), "prec"=rgamma(1,1.0,1.0))
}

mod1 = jags.model(textConnection(mod_string), data=data_jags, inits=inits1, n.chains=3)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 51
##    Unobserved stochastic nodes: 5
##    Total graph size: 422
## 
## Initializing model

update(mod1, 1000) # burn-in

mod1_sim = coda.samples(model=mod1,
                        variable.names=params1,
                        n.iter=5000)

mod1_csim = do.call(rbind, mod1_sim) # combine multiple chains

plot(mod1_sim)

gelman.diag(mod1_sim)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]       1.06       1.13
## b[2]       1.16       1.45
## b[3]       1.02       1.03
## b0         1.17       1.49
## sig        1.00       1.01
## 
## Multivariate psrf
## 
## 1.11

autocorr.diag(mod1_sim)

##             b[1]      b[2]      b[3]        b0        sig
## Lag 0  1.0000000 1.0000000 1.0000000 1.0000000 1.00000000
## Lag 1  0.9865652 0.9947547 0.9771881 0.9957758 0.09874593
## Lag 5  0.9360024 0.9754781 0.8960006 0.9796510 0.08412357
## Lag 10 0.8801171 0.9547797 0.8101269 0.9604307 0.07397704
## Lag 50 0.5449190 0.8200073 0.4148074 0.8247768 0.01873949

autocorr.plot(mod1_sim)

effectiveSize(mod1_sim)

##       b[1]       b[2]       b[3]         b0        sig 
##   99.95165   34.96417  165.39745   30.93122 4578.33732

plot(education_model)

dic.samples(mod1, n.iter=1e5)

## Mean deviance:  481 
## penalty 5.223 
## Penalized deviance: 486.2

library("rjags")

mod_string_question_4a = " model {
    for (i in 1:length(education)) {
        education[i] ~ dnorm(mu[i], prec)
        mu[i] = b0 + b[1]*income[i] + b[2]*young[i]
    }
    
    b0 ~ dnorm(0.0, 1.0/1.0e6)
    for (i in 1:2) {
        b[i] ~ dnorm(0.0, 1.0/1.0e6)
    }
    
    prec ~ dgamma(1.0/2.0, 1.0*1500.0/2.0)
        ## Initial guess of variance based on overall
        ## variance of education variable. Uses low prior
        ## effective sample size. Technically, this is not
        ## a true 'prior', but it is not very informative.
    sig2 = 1.0 / prec
    sig = sqrt(sig2)
} "

data_jags = as.list(Anscombe)

params1 = c("b0","b", "sig")

inits1 = function() {
    inits = list("b"=rnorm(2,0.0,100.0), "prec"=rgamma(1,1.0,1.0))
}

mod_question_4a = jags.model(textConnection(mod_string_question_4a), data=data_jags, inits=inits1, n.chains=3)

## Warning in jags.model(textConnection(mod_string_question_4a), data = data_jags,
## : Unused variable "urban" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 51
##    Unobserved stochastic nodes: 4
##    Total graph size: 320
## 
## Initializing model

update(mod_question_4a, 1000) # burn-in

mod_question_4a_sim = coda.samples(model=mod_question_4a,
                        variable.names=params1,
                        n.iter=5000)

mod_question_4a_csim = do.call(rbind, mod_question_4a_sim) # combine multiple chains

plot(mod_question_4a_sim)

gelman.diag(mod_question_4a_sim)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]       1.01       1.01
## b[2]       1.16       1.48
## b0         1.12       1.35
## sig        1.01       1.02
## 
## Multivariate psrf
## 
## 1.1

autocorr.diag(mod_question_4a_sim)

##             b[1]      b[2]        b0        sig
## Lag 0  1.0000000 1.0000000 1.0000000 1.00000000
## Lag 1  0.9731861 0.9938796 0.9950769 0.07777506
## Lag 5  0.8721065 0.9696513 0.9757617 0.06467455
## Lag 10 0.7570684 0.9412230 0.9510940 0.04810342
## Lag 50 0.2867119 0.7417381 0.7806945 0.04089848

autocorr.plot(mod_question_4a_sim)

effectiveSize(mod_question_4a_sim)

##       b[1]       b[2]         b0        sig 
##  203.85946   46.06856   37.03094 4123.32051

dic.samples(mod_question_4a, n.iter=1e5)

## Mean deviance:  489.2 
## penalty 4.149 
## Penalized deviance: 493.3

library("rjags")

mod_string_question_4b = " model {
    for (i in 1:length(education)) {
        education[i] ~ dnorm(mu[i], prec)
        mu[i] = b0 + b[1]*income[i] + b[2]*young[i]+ b[3]*young[i]*income[i]
    }
    
    b0 ~ dnorm(0.0, 1.0/1.0e6)
    for (i in 1:3) {
        b[i] ~ dnorm(0.0, 1.0/1.0e6)
    }
    
    prec ~ dgamma(1.0/2.0, 1.0*1500.0/2.0)
        ## Initial guess of variance based on overall
        ## variance of education variable. Uses low prior
        ## effective sample size. Technically, this is not
        ## a true 'prior', but it is not very informative.
    sig2 = 1.0 / prec
    sig = sqrt(sig2)
} "

data_jags = as.list(Anscombe)

params1 = c("b0","b", "sig")

inits1 = function() {
    inits = list("b"=rnorm(3,0.0,100.0), "prec"=rgamma(1,1.0,1.0))
}

mod_question_4b = jags.model(textConnection(mod_string_question_4b), data=data_jags, inits=inits1, n.chains=3)

## Warning in jags.model(textConnection(mod_string_question_4b), data = data_jags,
## : Unused variable "urban" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 51
##    Unobserved stochastic nodes: 5
##    Total graph size: 372
## 
## Initializing model

update(mod_question_4b, 1000) # burn-in

mod_question_4b_sim = coda.samples(model=mod_question_4b,
                        variable.names=params1,
                        n.iter=5000)

mod_question_4b_csim = do.call(rbind, mod_question_4b_sim) # combine multiple chains

plot(mod_question_4b_sim)

gelman.diag(mod_question_4b_sim)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]      17.31      39.50
## b[2]      16.99      42.18
## b[3]      17.14      37.76
## b0        16.89      44.40
## sig        2.39       4.24
## 
## Multivariate psrf
## 
## 13.4

autocorr.diag(mod_question_4b_sim)

##             b[1]      b[2]      b[3]        b0       sig
## Lag 0  1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1  0.9967772 0.9969899 0.9964436 0.9966958 0.6937121
## Lag 5  0.9841114 0.9844832 0.9822237 0.9860516 0.6741533
## Lag 10 0.9683321 0.9705026 0.9644882 0.9743438 0.6530952
## Lag 50 0.8532070 0.8877688 0.8433282 0.8784100 0.5382250

autocorr.plot(mod_question_4b_sim)

effectiveSize(mod_question_4b_sim)

##      b[1]      b[2]      b[3]        b0       sig 
##  24.21060  15.23749  26.40313  20.28826 260.42086

dic.samples(mod_question_4b, n.iter=1e5)

## Mean deviance:  487.3 
## penalty 5.476 
## Penalized deviance: 492.8

Linear Regression Quizes

Ken Wood

2023-07-25