#1.Bayesian Logistic Regression
#2.Graphical models and conditional independence
#3.Testing association in the presence of confounding
#4.Bayesian hypothesis testing and Bayes factor
X.E <- c(rep(1,39), rep(0,114), rep(1,24), rep(0,154))
Y.D <- c(rep(1,153), rep(0,178))
dput( list(N=331, X.E=X.E, Y.D=Y.D), "data.txt")
mod <- glm(Y.D ~ X.E, family = binomial)
summary(mod)
##
## Call:
## glm(formula = Y.D ~ X.E, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.389 -1.053 -1.053 1.308 1.308
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3008 0.1236 -2.434 0.01492 *
## X.E 0.7863 0.2874 2.736 0.00622 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 456.97 on 330 degrees of freedom
## Residual deviance: 449.26 on 329 degrees of freedom
## AIC: 453.26
##
## Number of Fisher Scoring iterations: 4
library(rjags)
## Warning: package 'rjags' was built under R version 3.4.3
## Loading required package: coda
## Warning: package 'coda' was built under R version 3.4.3
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
library(coda)
#Use loop{}to specify each observation
#model as a string
model.1 <-
"model {
###data model
for (i in 1:N){
Y.D[i] ~ dbin(p[i], 1)
logit(p[i]) <- beta_0 + beta_1*X.E[i]
}
OR <- exp(beta_1)
pos.prob <- step(beta_1)
### prior
beta_1 ~ dnorm(0,0.0001)
beta_0 ~dnorm(0,0.0001)
}"
data.1 <- list(N =331, X.E=X.E, Y.D=Y.D)
model_odds <-jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 331
## Unobserved stochastic nodes: 2
## Total graph size: 676
##
## Initializing model
test_odds <-coda.samples(model_odds, c("OR", "beta_0", "beta_1"), n.iter= 10000)
plot(test_odds)

test_odds <- coda.samples(model_odds, c("OR", "beta_1", "beta_0"), n.iter = 10000)
#coda.samples sets a trace monitor for all requested nodes, updates the model, and coerces the output to a single MCMC.list object.
summary(test_odds)
##
## Iterations = 11001:21000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## OR 2.2975 0.6721 0.006721 0.010024
## beta_0 -0.3016 0.1238 0.001238 0.001939
## beta_1 0.7910 0.2852 0.002852 0.004218
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## OR 1.2716 1.8154 2.1986 2.6757 3.86253
## beta_0 -0.5448 -0.3815 -0.3024 -0.2182 -0.05715
## beta_1 0.2402 0.5963 0.7878 0.9842 1.35132
plot(test_odds)

#For OR, based on posterior sampling, 95% of he distribution is between 1.2 and 4.01. Hence, there is a significant association between disease D and exposure E.
X.E <- c( rep(1, 21), rep(0, 26), rep(1, 18), rep(0,88), rep(1, 17), rep(0, 59), rep(1, 7), rep(0, 95))
X.age <- c( rep(0, 47), rep(1, 106),
rep(0, 76), rep(1, 102))
Y.D <- c( rep(1, 153), rep(0, 178))
table(Y.D, X.E, X.age)
## , , X.age = 0
##
## X.E
## Y.D 0 1
## 0 59 17
## 1 26 21
##
## , , X.age = 1
##
## X.E
## Y.D 0 1
## 0 95 7
## 1 88 18
#Traditional Analysis
mod <- glm(Y.D ~ X.E+X.age, family=binomial)
summary(mod)
##
## Call:
## glm(formula = Y.D ~ X.E + X.age, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.598 -1.145 -0.855 1.210 1.539
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8180 0.2176 -3.759 0.000171 ***
## X.E 1.0266 0.3056 3.360 0.000780 ***
## X.age 0.7409 0.2483 2.984 0.002847 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 456.97 on 330 degrees of freedom
## Residual deviance: 440.01 on 328 degrees of freedom
## AIC: 446.01
##
## Number of Fisher Scoring iterations: 4
model.1 <-
"model {
###data model
for (i in 1:N){
Y.D[i] ~ dbin(p[i], 1)
logit(p[i]) <- beta_0 + beta_1*X.E[i]+beta_2*X.age[i]
}
OR <- exp(beta_1)
pos.prob <- step(beta_1)
### prior
beta_0 ~dnorm(0,0.0001)
beta_1 ~ dnorm(0,0.0001)
beta_2 ~dnorm(0,0001)
}"
data.1 <- list(N =331, X.E=X.E, Y.D=Y.D, X.age=X.age)
model_odds <-jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 331
## Unobserved stochastic nodes: 3
## Total graph size: 1014
##
## Initializing model
#specify parameters for analysis
test_odds <-coda.samples(model_odds, c("OR", "beta_0", "beta_1", "beta_2"), n.iter= 10000)
#Note that our classical estimate for OR is exp(1.0266) = 2.79 (thus, we want to report posterior median)
#posterior is not symmetrical
summary(test_odds)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## OR 2.9294 0.9390 0.009390 0.017327
## beta_0 -0.7885 0.2091 0.002091 0.006016
## beta_1 1.0271 0.3072 0.003072 0.005442
## beta_2 0.6957 0.2385 0.002385 0.006899
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## OR 1.5420 2.2714 2.7806 3.4233 5.2043
## beta_0 -1.2141 -0.9297 -0.7815 -0.6411 -0.3961
## beta_1 0.4331 0.8204 1.0227 1.2306 1.6495
## beta_2 0.2430 0.5332 0.6898 0.8557 1.1760
plot(test_odds)

#plot of iterations vs. sampled values for each variable in the chain, with a separate plot for variables.
traceplot(test_odds)




densplot(test_odds)




autocorr.plot(test_odds)

#Issue with paratimization? - trace plots should be stationary.
#PART 2#
#Bayesian Approach and Statstical Significance#
#A Bayesian statistician aims to apply posterior distribution to assess evidence that OR is not 1
#p(H0)=p(M0) and p(H1)=p(M1)
#We have a prior probability for each hypothesis
#Posterior odds = Bayes Factor * Prior Odds
library(rjags)
model.1 <-
"model{
###data model
for(i in 1:N){
Y.D[i] ~ dbin(p[i],1)
logit(p[i]) <- beta_0+beta_1*X.E[i]+beta_2*X.age[i]
}
OR <- exp(beta_1)
pos.prob <- step(OR-2)
###prior
beta_0 ~dnorm(0,0.0001)
beta_1 ~dnorm(0,0.0001)
beta_2 ~dnorm(0,0.0001)
}"
data.1 <-list(N=331, X.E=X.E, Y.D=Y.D, X.age=X.age)
model_odds <- jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 331
## Unobserved stochastic nodes: 3
## Total graph size: 1016
##
## Initializing model
#Bayesian hypothesis testing is based on the posterior odds
#Bayes factor shows the evidence provided by the data
test_odds <- coda.samples(model_odds, c("OR", "pos.prob"), n.iter=10000)
summary(test_odds)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## OR 3.006 0.9912 0.009912 0.019890
## pos.prob 0.869 0.3374 0.003374 0.005187
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## OR 1.555 2.299 2.837 3.543 5.377
## pos.prob 0.000 1.000 1.000 1.000 1.000
plot(test_odds)

#If we were to apply sharp hypothesis of
#logit(pij) = B0+B1X1+B2X2+B3X1X2
#H0: B3=0 H1:B3 not zero
library(rjags)
model.1 <-
"model{
for(i in 1:N){
Y.D[i] ~ dbin(p[i], 1)
logit(p[i]) <- beta_0+beta_1*X.E[i]+beta_2*X.age[i]+beta_3*X.E[i]*X.age[i]
}
OR.young <- exp(beta_1)
OR.old <- exp(beta_1+beta_3)
ROR <- exp(beta_3)
### prior
beta_0 ~ dnorm(0,0.0001)
beta_1 ~ dnorm(0,0.0001)
beta_2 ~ dnorm(0,0.0001)
beta_3 ~ dnorm(0,0.0001)
}"
data.1 <- list(N=331, X.E=X.E, Y.D=Y.D, X.age=X.age)
model_odds <- jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 331
## Unobserved stochastic nodes: 4
## Total graph size: 1021
##
## Initializing model
#95% credible interval includes zero. Hence, B3 is not significantly differ from zero.
#We fail to reject the null that B3 is zero.
#No interaction
test_odds<-coda.samples(model_odds, c("OR.young","OR.old","ROR","beta_1","beta_2","beta_3"),n.iter=10000)
summary(test_odds)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## OR.old 3.28071 1.7403 0.017403 0.022548
## OR.young 3.10804 1.2739 0.012739 0.036326
## ROR 1.22806 0.8479 0.008479 0.018963
## beta_1 1.05599 0.3954 0.003954 0.011472
## beta_2 0.76869 0.2706 0.002706 0.009444
## beta_3 0.01547 0.6130 0.006130 0.015239
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## OR.old 1.1834 2.1169 2.88725 3.9826 7.714
## OR.young 1.3213 2.1909 2.87930 3.7490 6.298
## ROR 0.3060 0.6702 1.01528 1.5330 3.442
## beta_1 0.2786 0.7843 1.05755 1.3215 1.840
## beta_2 0.2311 0.5880 0.76638 0.9501 1.301
## beta_3 -1.1843 -0.4002 0.01517 0.4272 1.236
plot(test_odds)


traceplot(test_odds)






densplot(test_odds)






autocorr.plot(test_odds)
