#JAGSExamples / scripts / ols / ols_regression.R
# Basic inference.
library('rjags')
## Loading required package: coda
## Linked to JAGS 3.4.0
## Loaded modules: basemod,bugs
setwd("~/new/Bayes/Beyes/rjags/rjags1")
df <- read.csv("ols_regression.csv")
head(df)
## X Y
## 1 0.0 84.33865
## 2 0.1 105.59108
## 3 0.2 81.10928
## 4 0.3 142.88202
## 5 0.4 112.23769
## 6 0.5 84.48829
tail(df)
## X Y
## 96 9.5 208.9622
## 97 9.6 164.0852
## 98 9.7 182.6684
## 99 9.8 167.3847
## 100 9.9 187.1650
## 101 10.0 184.4908
with(df, plot(X, Y))

dev.off()
## null device
## 1
ols_regression_code <- '
model
{
for (i in 1:N)
{
y[i] ~ dnorm(mu[i], tau)
mu[i] <- a * x[i] + b
}
a ~ dnorm(0, 0.0001)
b ~ dnorm(0, 0.0001)
tau <- pow(sigma, -2)
sigma ~ dunif(0, 10000)
}
'
writeLines(ols_regression_code,con="ols_regression.txt")
jags <- jags.model("ols_regression.txt",
data = list('x' = with(df, X),
'y' = with(df, Y),
'N' = nrow(df)),
n.chains = 4,
n.adapt = 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph Size: 414
##
## Initializing model
mcmc.samples <- coda.samples(jags,
c('a', 'b', 'sigma'),
1000)
#png(file.path('graphs', 'ols', 'plot1.png'))
plot(mcmc.samples)
dev.off()
## null device
## 1
summary(mcmc.samples)
##
## Iterations = 1001:2000
## Thinning interval = 1
## Number of chains = 4
## Sample size per chain = 1000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a 9.809 0.7875 0.01245 0.03111
## b 103.449 4.5364 0.07173 0.17962
## sigma 22.769 1.6337 0.02583 0.03065
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a 8.261 9.28 9.811 10.34 11.34
## b 94.629 100.46 103.392 106.58 112.36
## sigma 19.781 21.64 22.689 23.80 26.27
# Compare with true OLS solution.
lm(Y ~ X, data = df)
##
## Call:
## lm(formula = Y ~ X, data = df)
##
## Coefficients:
## (Intercept) X
## 103.620 9.784
#JAGSExamples / scripts / ols / ols_regression_outlier.R
# Basic inference.
library('rjags')
df <- read.csv("ols_regression_outlier.csv")
head(df)
## X Y
## 1 0.0 84.33865
## 2 0.5 109.59108
## 3 1.0 89.10928
## 4 1.5 154.88202
## 5 2.0 128.23769
## 6 2.5 104.48829
tail(df)
## X Y
## 16 7.5 173.8767
## 17 8.0 179.5952
## 18 8.5 208.5959
## 19 9.0 210.5305
## 20 9.5 209.8475
## 21 10.0 222.9744
#png(file.path('graphs', 'ols', 'outlier_data_plot.png'))
with(df, plot(X, Y))

dev.off()
## null device
## 1
ols_regression_outlier_code <- '
model
{
for (i in 1:N)
{
y[i] ~ dnorm(mu[i], tau)
mu[i] <- a * x[i] + b
}
a ~ dnorm(0, 0.0001)
b ~ dnorm(0, 0.0001)
tau <- pow(sigma, -2)
sigma ~ dunif(0, 10000)
}
'
writeLines(ols_regression_outlier_code,con="ols_regression_outlier.txt")
jags <- jags.model("ols_regression_outlier.txt",
data = list('x' = with(df, X),
'y' = with(df, Y),
'N' = nrow(df)),
n.chains = 4,
n.adapt = 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph Size: 94
##
## Initializing model
mcmc.samples <- coda.samples(jags,
c('a', 'b', 'sigma'),
5000)
#png(file.path('graphs', 'ols', 'plot2.png'))
plot(mcmc.samples)
dev.off()
## null device
## 1
summary(mcmc.samples)
##
## Iterations = 1001:6000
## Thinning interval = 1
## Number of chains = 4
## Sample size per chain = 5000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## a 15.47 12.34 0.08729 0.1747
## b 98.72 66.38 0.46940 0.9379
## sigma 208.62 37.12 0.26248 0.4028
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## a -8.474 7.106 15.29 23.52 40.63
## b -35.073 55.282 100.08 143.75 225.19
## sigma 150.427 182.218 203.94 229.41 295.31
head(mcmc.samples)
## [[1]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## a b sigma
## [1,] 27.935359 3.047253 190.6233
## [2,] 18.591269 60.427848 186.7129
## [3,] 19.671815 80.037902 206.2339
## [4,] -2.161539 139.800577 216.3313
## [5,] -1.602469 117.293558 218.7482
## [6,] 7.503737 217.012209 159.4601
## [7,] 4.688828 165.505042 156.2205
##
## [[2]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## a b sigma
## [1,] 4.911661 89.61051 190.4600
## [2,] 30.370172 57.26697 285.8211
## [3,] 29.906746 13.31266 215.6720
## [4,] 37.136053 -13.66003 203.3182
## [5,] 31.342852 37.85029 207.5692
## [6,] 33.116164 -12.66497 197.4253
## [7,] 23.746096 21.39532 211.8762
##
## [[3]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## a b sigma
## [1,] 25.960361 97.62624 184.0635
## [2,] 15.842559 44.72934 177.4893
## [3,] 11.620921 77.62076 250.6084
## [4,] -5.173039 110.99402 209.0308
## [5,] -5.712100 165.98835 209.6690
## [6,] 9.615401 215.75741 199.7938
## [7,] 7.766020 110.05633 191.3373
##
## [[4]]
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1001
## End = 1007
## Thinning interval = 1
## a b sigma
## [1,] 7.738470 148.21596 197.4843
## [2,] 8.707947 179.56004 221.1838
## [3,] 22.256521 77.40683 260.0684
## [4,] 13.169091 59.42339 266.6239
## [5,] 22.447629 135.48669 295.7617
## [6,] 21.388918 95.49483 166.6832
## [7,] 4.139971 116.45145 273.3528
##
## attr(,"class")
## [1] "mcmc.list"
# Compare with true OLS solution.
lm(Y ~ X, data = df)
##
## Call:
## lm(formula = Y ~ X, data = df)
##
## Coefficients:
## (Intercept) X
## 171.812 4.816