ols

#JAGSExamples / scripts / ols / ols_regression.R

# Basic inference.
library('rjags')

## Loading required package: coda
## Linked to JAGS 3.4.0
## Loaded modules: basemod,bugs

setwd("~/new/Bayes/Beyes/rjags/rjags1")

df <- read.csv("ols_regression.csv")
head(df)

##     X         Y
## 1 0.0  84.33865
## 2 0.1 105.59108
## 3 0.2  81.10928
## 4 0.3 142.88202
## 5 0.4 112.23769
## 6 0.5  84.48829

tail(df)

##        X        Y
## 96   9.5 208.9622
## 97   9.6 164.0852
## 98   9.7 182.6684
## 99   9.8 167.3847
## 100  9.9 187.1650
## 101 10.0 184.4908

with(df, plot(X, Y))

dev.off()

## null device 
##           1

ols_regression_code <- '
  model
{
  for (i in 1:N)
  {
  y[i] ~ dnorm(mu[i], tau)
  mu[i] <- a * x[i] + b
  }
  
  a ~ dnorm(0, 0.0001)
  b ~ dnorm(0, 0.0001)
  
  tau <- pow(sigma, -2)
  sigma ~ dunif(0, 10000)
  }
'

writeLines(ols_regression_code,con="ols_regression.txt")


jags <- jags.model("ols_regression.txt",
                   data = list('x' = with(df, X),
                               'y' = with(df, Y),
                               'N' = nrow(df)),
                   n.chains = 4,
                   n.adapt = 1000)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 414
## 
## Initializing model

mcmc.samples <- coda.samples(jags,
                             c('a', 'b', 'sigma'),
                             1000)

#png(file.path('graphs', 'ols', 'plot1.png'))
plot(mcmc.samples)
dev.off()

## null device 
##           1

summary(mcmc.samples)

## 
## Iterations = 1001:2000
## Thinning interval = 1 
## Number of chains = 4 
## Sample size per chain = 1000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##          Mean     SD Naive SE Time-series SE
## a       9.809 0.7875  0.01245        0.03111
## b     103.449 4.5364  0.07173        0.17962
## sigma  22.769 1.6337  0.02583        0.03065
## 
## 2. Quantiles for each variable:
## 
##         2.5%    25%     50%    75%  97.5%
## a      8.261   9.28   9.811  10.34  11.34
## b     94.629 100.46 103.392 106.58 112.36
## sigma 19.781  21.64  22.689  23.80  26.27

# Compare with true OLS solution.
lm(Y ~ X, data = df)

## 
## Call:
## lm(formula = Y ~ X, data = df)
## 
## Coefficients:
## (Intercept)            X  
##     103.620        9.784

#JAGSExamples / scripts / ols / ols_regression_outlier.R 

# Basic inference.
library('rjags')

df <- read.csv("ols_regression_outlier.csv")
head(df)

##     X         Y
## 1 0.0  84.33865
## 2 0.5 109.59108
## 3 1.0  89.10928
## 4 1.5 154.88202
## 5 2.0 128.23769
## 6 2.5 104.48829

tail(df)

##       X        Y
## 16  7.5 173.8767
## 17  8.0 179.5952
## 18  8.5 208.5959
## 19  9.0 210.5305
## 20  9.5 209.8475
## 21 10.0 222.9744

#png(file.path('graphs', 'ols', 'outlier_data_plot.png'))
with(df, plot(X, Y))

dev.off()

## null device 
##           1

ols_regression_outlier_code <- '
  model
{
  for (i in 1:N)
  {
  y[i] ~ dnorm(mu[i], tau)
  mu[i] <- a * x[i] + b
  }
  
  a ~ dnorm(0, 0.0001)
  b ~ dnorm(0, 0.0001)
  
  tau <- pow(sigma, -2)
  sigma ~ dunif(0, 10000)
  }
'

writeLines(ols_regression_outlier_code,con="ols_regression_outlier.txt")

jags <- jags.model("ols_regression_outlier.txt",
                   data = list('x' = with(df, X),
                               'y' = with(df, Y),
                               'N' = nrow(df)),
                   n.chains = 4,
                   n.adapt = 1000)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 94
## 
## Initializing model

mcmc.samples <- coda.samples(jags,
                             c('a', 'b', 'sigma'),
                             5000)

#png(file.path('graphs', 'ols', 'plot2.png'))
plot(mcmc.samples)
dev.off()

## null device 
##           1

summary(mcmc.samples)

## 
## Iterations = 1001:6000
## Thinning interval = 1 
## Number of chains = 4 
## Sample size per chain = 5000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##         Mean    SD Naive SE Time-series SE
## a      15.47 12.34  0.08729         0.1747
## b      98.72 66.38  0.46940         0.9379
## sigma 208.62 37.12  0.26248         0.4028
## 
## 2. Quantiles for each variable:
## 
##          2.5%     25%    50%    75%  97.5%
## a      -8.474   7.106  15.29  23.52  40.63
## b     -35.073  55.282 100.08 143.75 225.19
## sigma 150.427 182.218 203.94 229.41 295.31

head(mcmc.samples)

## [[1]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001 
## End = 1007 
## Thinning interval = 1 
##              a          b    sigma
## [1,] 27.935359   3.047253 190.6233
## [2,] 18.591269  60.427848 186.7129
## [3,] 19.671815  80.037902 206.2339
## [4,] -2.161539 139.800577 216.3313
## [5,] -1.602469 117.293558 218.7482
## [6,]  7.503737 217.012209 159.4601
## [7,]  4.688828 165.505042 156.2205
## 
## [[2]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001 
## End = 1007 
## Thinning interval = 1 
##              a         b    sigma
## [1,]  4.911661  89.61051 190.4600
## [2,] 30.370172  57.26697 285.8211
## [3,] 29.906746  13.31266 215.6720
## [4,] 37.136053 -13.66003 203.3182
## [5,] 31.342852  37.85029 207.5692
## [6,] 33.116164 -12.66497 197.4253
## [7,] 23.746096  21.39532 211.8762
## 
## [[3]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001 
## End = 1007 
## Thinning interval = 1 
##              a         b    sigma
## [1,] 25.960361  97.62624 184.0635
## [2,] 15.842559  44.72934 177.4893
## [3,] 11.620921  77.62076 250.6084
## [4,] -5.173039 110.99402 209.0308
## [5,] -5.712100 165.98835 209.6690
## [6,]  9.615401 215.75741 199.7938
## [7,]  7.766020 110.05633 191.3373
## 
## [[4]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001 
## End = 1007 
## Thinning interval = 1 
##              a         b    sigma
## [1,]  7.738470 148.21596 197.4843
## [2,]  8.707947 179.56004 221.1838
## [3,] 22.256521  77.40683 260.0684
## [4,] 13.169091  59.42339 266.6239
## [5,] 22.447629 135.48669 295.7617
## [6,] 21.388918  95.49483 166.6832
## [7,]  4.139971 116.45145 273.3528
## 
## attr(,"class")
## [1] "mcmc.list"

# Compare with true OLS solution.
lm(Y ~ X, data = df)

## 
## Call:
## lm(formula = Y ~ X, data = df)
## 
## Coefficients:
## (Intercept)            X  
##     171.812        4.816