Linear regression MCMC

Simple linear regression

makes the posterior to be near prior = MCMC stability

# Creating data
trueA = 5
trueB = 0
trueSd = 10
sampleSize = 31
# create independent x-values
x =(-(sampleSize-1)/2):((sampleSize-1)/2)
# create dependent values according to ax + b + N(0,sd)
y = trueA * x + trueB + rnorm(n=sampleSize,mean=0,sd=trueSd)
plot(x,y, main="Test Data")

# For estimating parameters in a Bayesian analysis, we need to derive the likelihood function for the model 
likelihood = function(param){
  a = param[1]
  b = param[2]
  sd = param[3]
  
  pred = a*x + b
  singlelikelihoods = dnorm(y, mean = pred, sd = sd, log = T)
  sumll = sum(singlelikelihoods)
  return(sumll)
}

# Defining the prior
# Prior distribution
prior = function(param){
  a = param[1]
  b = param[2]
  sd = param[3]
  aprior = dunif(a, min=0, max=10, log = T)
  bprior = dnorm(b, sd = 5, log = T)
  sdprior = dunif(sd, min=0, max=30, log = T)
  return(aprior+bprior+sdprior)
}

# The posterior
posterior = function(param){
  return (likelihood(param) + prior(param))
}

Metropolis-Hastings algorithm

# proposal function
proposalfunction = function(param){
  return(rnorm(3,mean = param, sd= c(0.1,0.5,0.3)))
}

# MH algorithm 
run_metropolis_MCMC = function(startvalue, iterations){
  chain = array(dim = c(iterations+1,3))
  chain[1,] = startvalue
  for (i in 1:iterations){
    proposal = proposalfunction(chain[i,])
    
    probab = exp(posterior(proposal) - posterior(chain[i,]))     #p1/p2 = exp[log(p1)-log(p2)].
    if (runif(1) < probab){   #get the uniform distribution points which meet the requirement / accept rate 
      chain[i+1,] = proposal
    }else{
      chain[i+1,] = chain[i,]
    }
  }
  return(chain)
}

# run MH
startvalue = c(4,0,10)
chain = run_metropolis_MCMC(startvalue, 10000)

# acceptance rate
burnIn = 5000
acceptance = 1-mean(duplicated(chain[-(1:burnIn),]))  #The acceptance rate can be influenced by the proposal function

acceptance

## [1] 0.7418516

Plot parameters

### Summary: #######################

par(mfrow = c(2,3))
hist(chain[-(1:burnIn),1],nclass=30, , main="Posterior of a", xlab="True value = red line" )
abline(v = mean(chain[-(1:burnIn),1]))
abline(v = trueA, col="red" )
hist(chain[-(1:burnIn),2],nclass=30, main="Posterior of b", xlab="True value = red line")
abline(v = mean(chain[-(1:burnIn),2]))
abline(v = trueB, col="red" )
hist(chain[-(1:burnIn),3],nclass=30, main="Posterior of sd", xlab="True value = red line")
abline(v = mean(chain[-(1:burnIn),3]) )
abline(v = trueSd, col="red" )
plot(chain[-(1:burnIn),1], type = "l", xlab="True value = red line" , main = "Chain values of a", )
abline(h = trueA, col="red" )
plot(chain[-(1:burnIn),2], type = "l", xlab="True value = red line" , main = "Chain values of b", )
abline(h = trueB, col="red" )
plot(chain[-(1:burnIn),3], type = "l", xlab="True value = red line" , main = "Chain values of sd", )
abline(h = trueSd, col="red" )

# slope
mean(chain[-(1:burnIn),1])

## [1] 4.533057

# intercept
mean(chain[-(1:burnIn),2])

## [1] -1.553207

Linear regression MCMC in JAGS

# create data
snakes1 <- data.frame(b_length = x, b_mass = y)
head(snakes1)

##   b_length    b_mass
## 1      -15 -65.29318
## 2      -14 -61.34337
## 3      -13 -69.83677
## 4      -12 -56.90311
## 5      -11 -52.84676
## 6      -10 -44.16475

library(R2jags)

## Loading required package: rjags

## Loading required package: coda

## Linked to JAGS 4.3.1

## Loaded modules: basemod,bugs

## 
## Attaching package: 'R2jags'

## The following object is masked from 'package:coda':
## 
##     traceplot

jagsdata_s1 <- with(snakes1, list(b_mass = b_mass, b_length = b_length, N = length(b_mass)))

lm1_jags <- function(){
  # Likelihood:
  for (i in 1:N){
    b_mass[i] ~ dnorm(mu[i], tau) # tau is precision (1 / variance)
    mu[i] <- alpha + beta * b_length[i]
  }
  # Priors:
  alpha ~ dnorm(0, 0.01) # intercept
  beta ~ dnorm(0, 0.01) # slope
  sigma ~ dunif(0, 100) # standard deviation
  tau <- 1 / (sigma * sigma) # sigma^2 doesn't work in JAGS
}

init_values <- function(){
  list(alpha = rnorm(1), beta = rnorm(1), sigma = runif(1))
}

params <- c("alpha", "beta", "sigma")

fit_lm1 <- jags(data = jagsdata_s1, inits = init_values, parameters.to.save = params, model.file = lm1_jags,
                n.chains = 3, n.iter = 12000, n.burnin = 5000, n.thin = 10, DIC = F)

## module glm loaded

## module dic loaded

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 31
##    Unobserved stochastic nodes: 3
##    Total graph size: 134
## 
## Initializing model

fit_lm1

## Inference for Bugs model at "C:/Users/hed2/AppData/Local/Temp/RtmpmO3kbg/model519817325cec", fit using jags,
##  3 chains, each with 12000 iterations (first 5000 discarded), n.thin = 10
##  n.sims = 2100 iterations saved
##       mu.vect sd.vect   2.5%    25%    50%    75%  97.5%  Rhat n.eff
## alpha  -1.125   1.646 -4.299 -2.186 -1.162 -0.055  2.164 1.000  2100
## beta    4.562   0.183  4.199  4.441  4.561  4.683  4.918 1.001  2100
## sigma   9.167   1.297  7.104  8.227  9.007  9.939 12.046 1.001  2100
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).

Plot(fit_lm1)

# plot
# plot(fit_lm1)
traceplot(fit_lm1, mfrow = c(2, 2), ask = F)

lm1_mcmc <- as.mcmc(fit_lm1)
plot(lm1_mcmc)

If the Rhat values are substantially larger than one, the chains haven’t mixed properly and the posterior estimates cannot be trusted.

Prediction

# new dataset
nvalues <- 100
b_length_new <- seq(min(snakes1$b_length), max(snakes1$b_length), length.out = nvalues)

# we combine the three chains into one
lm1_mcmc_combi <- as.mcmc(rbind(lm1_mcmc[[1]], lm1_mcmc[[2]], lm1_mcmc[[3]]))

# calculate the expected value of y for each of the new x
pred_mean_mean <- mean(lm1_mcmc_combi[, "alpha"]) + b_length_new * mean(lm1_mcmc_combi[, "beta"])

head(pred_mean_mean)

## [1] -69.55662 -68.17417 -66.79171 -65.40926 -64.02680 -62.64435

A kind of CI

# the uncertainty around this mean. 
# ci
pred_mean_dist <- matrix(NA, nrow = nrow(lm1_mcmc_combi), ncol = nvalues)
for (i in 1:nrow(pred_mean_dist)){
  pred_mean_dist[i,] <- lm1_mcmc_combi[i,"alpha"] + b_length_new * lm1_mcmc_combi[i,"beta"]
}
credible_lower <- apply(pred_mean_dist, MARGIN = 2, quantile, prob = 0.025)
credible_upper <- apply(pred_mean_dist, MARGIN = 2, quantile, prob = 0.975)

Percentiles

# percentile
lm1_mcmc_combi_rep <- do.call(rbind, rep(list(lm1_mcmc_combi), 50)) # replication

# Draw random values for all parameter combinations (rows) and body length values (columns):
pred_data_dist <- matrix(NA, nrow = nrow(lm1_mcmc_combi_rep), ncol = nvalues)
for (i in 1:nrow(pred_data_dist)){
  pred_data_dist[i,] <- lm1_mcmc_combi_rep[i,"alpha"] + b_length_new * lm1_mcmc_combi_rep[i,"beta"] +
    rnorm(nvalues, mean = 0, sd = lm1_mcmc_combi_rep[i, "sigma"])
}

# Calculate quantiles:
uncertain_lower <- apply(pred_data_dist, MARGIN = 2, quantile, prob = 0.025)
uncertain_upper <- apply(pred_data_dist, MARGIN = 2, quantile, prob = 0.975)

Plot CI and percentiles

# plot
par(mfrow = c(1, 1))
plot(b_mass ~ b_length, data = snakes1)
lines(b_length_new, pred_mean_mean)
lines(b_length_new, credible_lower, lty = 2)
lines(b_length_new, credible_upper, lty = 2)
lines(b_length_new, uncertain_lower, lty = 2, col = "red")
lines(b_length_new, uncertain_upper, lty = 2, col = "red")

Comparison with SLR:

summary(lm(snakes1$b_mass~snakes1$b_length))

## 
## Call:
## lm(formula = snakes1$b_mass ~ snakes1$b_length)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.138  -6.248  -1.441   6.245  18.462 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.2255     1.5725  -0.779    0.442    
## snakes1$b_length   4.5618     0.1758  25.947   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.756 on 29 degrees of freedom
## Multiple R-squared:  0.9587, Adjusted R-squared:  0.9573 
## F-statistic: 673.2 on 1 and 29 DF,  p-value: < 2.2e-16

The above three methonds have similar results.

Multiple regression in JAGS

# multiple regression 
# create dataset
snakes2 <- data.frame(b_length = x, b_mass = y, sex = rep(c(0,1),times=16)[1:31] )
head(snakes2)

##   b_length    b_mass sex
## 1      -15 -65.29318   0
## 2      -14 -61.34337   1
## 3      -13 -69.83677   0
## 4      -12 -56.90311   1
## 5      -11 -52.84676   0
## 6      -10 -44.16475   1

summary(lm( snakes2$b_mass~snakes2$b_length +snakes2$sex,data=snakes2))

## 
## Call:
## lm(formula = snakes2$b_mass ~ snakes2$b_length + snakes2$sex, 
##     data = snakes2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.208  -6.318  -1.375   6.243  18.392 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.2913     2.2276  -0.580    0.567    
## snakes2$b_length   4.5618     0.1789  25.496   <2e-16 ***
## snakes2$sex        0.1359     3.2023   0.042    0.966    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.91 on 28 degrees of freedom
## Multiple R-squared:  0.9587, Adjusted R-squared:  0.9558 
## F-statistic:   325 on 2 and 28 DF,  p-value: < 2.2e-16

plot(b_mass ~ b_length, col = (sex + 1), data = snakes2)

# format the data
jagsdata_s2 <- with(snakes2, list(b_mass = b_mass, b_length = b_length, sex = sex, N = length(b_mass)))

# an R function containing the JAGS code:
  lm2_jags <- function(){
    # Likelihood:
    for (i in 1:N){
      b_mass[i] ~ dnorm(mu[i], tau) # tau is precision (1 / variance)
      mu[i] <- alpha[1]   + beta[1] * sex[i] + beta[2] * b_length[i]
    }
    # Priors:
    alpha[1] ~ dnorm(0, 0.01)
    for (i in 1:2){
      
      beta[i] ~ dnorm(0, 0.01)
    }
    sigma ~ dunif(0, 100)
    tau <- 1 / (sigma * sigma)
  }

  
  # Markov chains and initial values
  init_values <- function(){
    list(alpha = rnorm(1), beta = rnorm(2), sigma = runif(1))
  }
  
  params <- c("alpha", "beta", "sigma")
  
  fit_lm2 <- jags(data = jagsdata_s2, inits = init_values, parameters.to.save = params, model.file = lm2_jags,
                  n.chains = 3, n.iter = 12000, n.burnin = 2000, n.thin = 10, DIC = F)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 31
##    Unobserved stochastic nodes: 4
##    Total graph size: 168
## 
## Initializing model

  fit_lm2

## Inference for Bugs model at "C:/Users/hed2/AppData/Local/Temp/RtmpmO3kbg/model51983bea4c82", fit using jags,
##  3 chains, each with 12000 iterations (first 2000 discarded), n.thin = 10
##  n.sims = 3000 iterations saved
##         mu.vect sd.vect   2.5%    25%    50%   75%  97.5%  Rhat n.eff
## alpha    -1.131   2.199 -5.367 -2.603 -1.163 0.347  3.168 1.003   810
## beta[1]   0.003   3.170 -6.301 -2.135 -0.036 2.105  6.303 1.001  3000
## beta[2]   4.558   0.184  4.188  4.437  4.560 4.680  4.923 1.001  2100
## sigma     9.257   1.287  7.200  8.355  9.113 9.968 12.160 1.001  3000
## 
## For each parameter, n.eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor (at convergence, Rhat=1).

Results are similar to the multiple regression

References