Bayesian Regression

Igor — May 26, 2015, 8:57 PM

##!/usr/bin/R
## -*- coding: utf-8 -*-
##
## Time-stamp: <2015-05-26 17:10:34 igor> 
##
## Comments: Test BRUGS package
## http://tinyurl.com/jvsdg3w
##
rm(list=ls()) # clear out memory

options(digits=7)
devAskNewPage(FALSE)
options(device.ask.default = FALSE)

# set working directory
setwd('~/Dropbox/workplace/lecture-notes/GaTech/regression/homework2')

## in.file <- 'input.csv' # specify input file
## in.data  <- read.csv(in.file, header = T, sep = ',')

par(mfrow = c(1,1))



library(BRugs)
Welcome to BRugs connected to OpenBUGS version 3.2.3
modelstring = "
# BUGS model specification begins here...
model {

    ## PRIOR.
    b[1] ~ dnorm(0.00, 0.001)
    b[2] ~ dnorm(0.00, 0.001)
    b[3] ~ dnorm(0.00, 0.001)
    b[4] ~ dnorm(0.00, 0.001)
    tau ~ dgamma(0.01, 0.001)
    sigma2 <- 1/tau

    ## LIKELIHOOD.
    for( i in 1:n )
     {
       mu[i] <- b[1] +
                b[2]*(X1[i]-mean(X1[])) +
                b[3]*(X2[i]-mean(X2[])) +
                b[4]*(X3[i]-mean(X3[]))

       Y[i] ~ dnorm(mu[i], tau)
     }

    M1 <- mean(X1[])
    M2 <- mean(X2[])
    M3 <- mean(X3[])

    ## PREDICTION.
    pmean <- b[1] +
             b[2]*(Acetic-M1) +
             b[3]*(H2S-M2) +
             b[4]*(Lactic-M3)

    c0 <- b[1] - b[2]*M1 - b[3]*M2 - b[4]*M3

    ypred ~ dnorm(pmean, tau)

    SSE <-(n-p)*sigma2

    for( i in 1:n)
    {
      err[i] <- Y[i] - mean(Y[])
     }

    SST <- inprod(err[], err[])

    ## BAYES R-SQUARED and R-SQUARED ADJUSTED 
    R2 <- 1 - SSE/SST
    R2adj <- 1-(n-1)*sigma2/SST


}
# ... end BUGS model specification
" # close quote for modelstring

## Write model to a file:
tempfile = file("hw2_part2.bug","w") ;
writeLines(modelstring, con=tempfile);
close(tempfile)

## Load model file into BRugs and check its syntax:
modelCheck( "hw2_part2.bug" )
model is syntactically correct
## Specify the data for BRugs model, as a list:

Taste_i = c(12.3, 39, 5.6, 37.3, 18.1, 34.9, 0.7, 54.9, 15.9, 18.0,
   14.0, 32.0, 16.8, 26.5, 13.4, 20.9, 47.9, 25.9, 21.9, 21.0, 57.2,
   25.9, 40.9, 6.4, 38.9, 15.2, 56.7, 11.6, 0.7, 5.5);

Acetic_i = c(4.54, 5.37, 4.66, 5.89, 4.90, 5.74, 4.48, 6.15, 4.79, 5.25,
   4.56, 5.46, 5.37, 6.46, 5.80, 5.16, 5.76, 5.70, 6.08, 5.24, 6.45,
   5.24, 6.37, 5.41, 5.44, 5.30, 5.86, 6.04, 5.33, 6.18);

H2S_i = c(3.13, 5.44, 3.81, 8.73, 3.85, 6.14, 3.00, 6.75, 3.91, 6.17,
   4.95, 9.24, 3.66, 6.92, 6.69, 5.04, 7.59, 7.60, 7.97, 4.17, 7.91,
   4.94, 9.59, 4.70, 9.06, 5.22, 10.20, 3.22, 3.91, 4.79);

Lactic_i = c(0.86, 1.57, 0.99, 1.29, 1.29, 1.68, 1.06, 1.52, 1.16, 1.63,
   1.15, 1.44, 1.31, 1.72, 1.08, 1.53, 1.81, 1.09, 1.78, 1.58, 1.90,
   1.30, 1.74, 1.49, 1.99, 1.33, 2.01, 1.46, 1.25, 1.25);

datalist =
  list(n=30, p=4, Y=Taste_i, X1=Acetic_i, X2=H2S_i, X3=Lactic_i,
       Acetic = 5, H2S = 8, Lactic = 2)

# Get the data into BRugs:
modelData(bugsData( datalist ))
data loaded
#### INITIALIZE THE CHAINS:

modelCompile()
model compiled
modelGenInits()
initial values generated, model initialized
#### RUN THE CHAINS:

## burn in
modelUpdate(1000)
1000 updates took 0 s
## Keep a record of sampled "theta" values
samplesSet(c("c0","b[1]","b[2]","b[3]","b[4]",
             "pmean","tau","R2","ypred"))
monitor set for variable 'c0'
monitor set for variable 'b[1]'
monitor set for variable 'b[2]'
monitor set for variable 'b[3]'
monitor set for variable 'b[4]'
monitor set for variable 'pmean'
monitor set for variable 'tau'
monitor set for variable 'R2'
monitor set for variable 'ypred'
## Arbitrary length of chain to generate.
chainlength = 20000

## Actually generate the chain.
modelUpdate(chainlength)
20000 updates took 0 s
# Put sampled values in a vector.
c0.Sample = samplesSample("c0")
b1.Sample = samplesSample("b[1]")
b2.Sample = samplesSample("b[2]")
b3.Sample = samplesSample("b[3]")
b4.Sample = samplesSample("b[4]")
pmean.Sample = samplesSample("pmean")
tau.Sample = samplesSample("tau")
R2.Sample = samplesSample("R2")
ypred.Sample = samplesSample("ypred")

# Plot the trajectory of the last 500 sampled values

par(mfrow = c(4,2))
hist(c0.Sample,nclas=80, col="lightgreen")
hist(b1.Sample,nclas=80, col="lightgreen")
hist(b2.Sample,nclas=80, col="lightgreen")
hist(b3.Sample,nclas=80, col="lightgreen")
hist(b4.Sample,nclas=80, col="lightgreen")
hist(pmean.Sample,nclas=80, col="lightgreen")
hist(tau.Sample,nclas=90, col="lightgreen")
hist(R2.Sample,nclas=80, col="lightgreen")

plot of chunk unnamed-chunk-1

par(mfrow = c(1,1))

## show statistics
sample.statistics=samplesStats("*")
print(sample.statistics)
            mean        sd  MC_error  val2.5pc     median val97.5pc start
R2      0.626800  0.111200 9.604e-04   0.35500   0.645700   0.78730  1001
b[1]   24.420000  1.905000 1.291e-02  20.67000  24.420000  28.19000  1001
b[2]    0.495400  4.536000 4.391e-02  -8.38100   0.500900   9.47800  1001
b[3]    4.026000  1.272000 1.417e-02   1.50900   4.023000   6.52500  1001
b[4]   18.170000  8.582000 1.052e-01   1.14200  18.220000  35.10000  1001
c0    -28.430000 20.150000 1.649e-01 -68.11000 -28.400000  11.20000  1001
pmean  42.600000  6.299000 6.286e-02  30.15000  42.610000  55.02000  1001
tau     0.009839  0.002724 2.335e-05   0.00526   0.009576   0.01595  1001
ypred  42.570000 12.250000 9.229e-02  18.47000  42.520000  66.50000  1001
      sample
R2     20000
b[1]   20000
b[2]   20000
b[3]   20000
b[4]   20000
c0     20000
pmean  20000
tau    20000
ypred  20000
# FIND QUANTILES

quantiles <- c(.025, .50, .975)

c0.q=quantile(c0.Sample, quantiles)
b1.q=quantile(b1.Sample, quantiles)
b2.q=quantile(b2.Sample, quantiles)
b3.q=quantile(b3.Sample, quantiles)
b4.q=quantile(b4.Sample, quantiles)
pmean.q=quantile(pmean.Sample, quantiles)
tau.q=quantile(tau.Sample, quantiles)
R2.q=quantile(R2.Sample, quantiles)

cat("=======================\n")
=======================
cat("quantiles=",quantiles,"\n")
quantiles= 0.025 0.5 0.975 
cat("c0.q=",c0.q,"\n")
c0.q= -68.11266 -28.39803 11.16235 
cat("b1.q=",b1.q,"\n")
b1.q= 20.66707 24.4232 28.18819 
cat("b2.q=",b2.q,"\n")
b2.q= -8.380714 0.5007305 9.476358 
cat("b3.q=",b3.q,"\n")
b3.q= 1.508766 4.023143 6.523844 
cat("b4.q=",b4.q,"\n")
b4.q= 1.141725 18.21702 35.08792 
cat("pmean.q=",pmean.q,"\n")
pmean.q= 30.1528 42.60651 55.02047 
cat("tau.q=",tau.q,"\n")
tau.q= 0.005259925 0.009576 0.01594902 
cat("R2.q=",R2.q,"\n")
R2.q= 0.3549801 0.645663 0.7872612 
cat("=======================\n")
=======================
## some more plots

## plot the MCMC chain
samplesHistory("*", beg=1000, end=1300,
               mfrow = c(3, 3), ask=NULL)

plot of chunk unnamed-chunk-1

## plot autocorrelations of 1st chain
samplesAutoC("*", 1,  mfrow = c(3,3), ask=NULL)

plot of chunk unnamed-chunk-1