#1.Bayesian Logistic Regression
#2.Graphical models and conditional independence
#3.Testing association in the presence of confounding
#4.Bayesian hypothesis testing and Bayes factor
X.E <- c(rep(1,39), rep(0,114), rep(1,24), rep(0,154))
Y.D <- c(rep(1,153), rep(0,178))
dput( list(N=331, X.E=X.E, Y.D=Y.D), "data.txt")

mod <- glm(Y.D ~ X.E, family = binomial)
summary(mod)
## 
## Call:
## glm(formula = Y.D ~ X.E, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.389  -1.053  -1.053   1.308   1.308  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -0.3008     0.1236  -2.434  0.01492 * 
## X.E           0.7863     0.2874   2.736  0.00622 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 456.97  on 330  degrees of freedom
## Residual deviance: 449.26  on 329  degrees of freedom
## AIC: 453.26
## 
## Number of Fisher Scoring iterations: 4
library(rjags)
## Warning: package 'rjags' was built under R version 3.4.3
## Loading required package: coda
## Warning: package 'coda' was built under R version 3.4.3
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
library(coda)
#Use loop{}to specify each observation
#model as a string

model.1 <-
"model {
###data model
for (i in 1:N){
Y.D[i] ~ dbin(p[i], 1)

logit(p[i]) <- beta_0 + beta_1*X.E[i]
}

OR <- exp(beta_1)
pos.prob <- step(beta_1)
### prior
beta_1 ~ dnorm(0,0.0001)
beta_0 ~dnorm(0,0.0001)
}"

data.1 <- list(N =331, X.E=X.E, Y.D=Y.D)

model_odds <-jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 331
##    Unobserved stochastic nodes: 2
##    Total graph size: 676
## 
## Initializing model
test_odds <-coda.samples(model_odds, c("OR", "beta_0", "beta_1"), n.iter= 10000)
plot(test_odds)

test_odds <- coda.samples(model_odds, c("OR", "beta_1", "beta_0"), n.iter = 10000)
#coda.samples sets a trace monitor for all requested nodes, updates the model, and coerces the output to a single MCMC.list object.
summary(test_odds)
## 
## Iterations = 11001:21000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean     SD Naive SE Time-series SE
## OR      2.2975 0.6721 0.006721       0.010024
## beta_0 -0.3016 0.1238 0.001238       0.001939
## beta_1  0.7910 0.2852 0.002852       0.004218
## 
## 2. Quantiles for each variable:
## 
##           2.5%     25%     50%     75%    97.5%
## OR      1.2716  1.8154  2.1986  2.6757  3.86253
## beta_0 -0.5448 -0.3815 -0.3024 -0.2182 -0.05715
## beta_1  0.2402  0.5963  0.7878  0.9842  1.35132
plot(test_odds)

#For OR, based on posterior sampling, 95% of he distribution is between 1.2 and 4.01. Hence, there is a significant association between disease D and exposure E. 
X.E <- c( rep(1, 21), rep(0, 26), rep(1, 18), rep(0,88), rep(1, 17), rep(0, 59), rep(1, 7), rep(0, 95))
   X.age <- c( rep(0, 47), rep(1, 106), 
               rep(0, 76), rep(1, 102))
     Y.D <- c( rep(1, 153), rep(0, 178))

   table(Y.D, X.E, X.age)
## , , X.age = 0
## 
##    X.E
## Y.D  0  1
##   0 59 17
##   1 26 21
## 
## , , X.age = 1
## 
##    X.E
## Y.D  0  1
##   0 95  7
##   1 88 18
#Traditional Analysis
mod <- glm(Y.D ~ X.E+X.age, family=binomial)
summary(mod)
## 
## Call:
## glm(formula = Y.D ~ X.E + X.age, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.598  -1.145  -0.855   1.210   1.539  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.8180     0.2176  -3.759 0.000171 ***
## X.E           1.0266     0.3056   3.360 0.000780 ***
## X.age         0.7409     0.2483   2.984 0.002847 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 456.97  on 330  degrees of freedom
## Residual deviance: 440.01  on 328  degrees of freedom
## AIC: 446.01
## 
## Number of Fisher Scoring iterations: 4
model.1 <-
"model {
###data model
for (i in 1:N){
Y.D[i] ~ dbin(p[i], 1)

logit(p[i]) <- beta_0 + beta_1*X.E[i]+beta_2*X.age[i]
}

OR <- exp(beta_1)
pos.prob <- step(beta_1)
### prior
beta_0 ~dnorm(0,0.0001)
beta_1 ~ dnorm(0,0.0001)
beta_2 ~dnorm(0,0001)
}"

data.1 <- list(N =331, X.E=X.E, Y.D=Y.D, X.age=X.age)
model_odds <-jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 331
##    Unobserved stochastic nodes: 3
##    Total graph size: 1014
## 
## Initializing model
#specify parameters for analysis
test_odds <-coda.samples(model_odds, c("OR", "beta_0", "beta_1", "beta_2"), n.iter= 10000)
#Note that our classical estimate for OR is exp(1.0266) = 2.79 (thus, we want to report posterior median)
#posterior is not symmetrical
summary(test_odds)
## 
## Iterations = 1001:11000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean     SD Naive SE Time-series SE
## OR      2.9294 0.9390 0.009390       0.017327
## beta_0 -0.7885 0.2091 0.002091       0.006016
## beta_1  1.0271 0.3072 0.003072       0.005442
## beta_2  0.6957 0.2385 0.002385       0.006899
## 
## 2. Quantiles for each variable:
## 
##           2.5%     25%     50%     75%   97.5%
## OR      1.5420  2.2714  2.7806  3.4233  5.2043
## beta_0 -1.2141 -0.9297 -0.7815 -0.6411 -0.3961
## beta_1  0.4331  0.8204  1.0227  1.2306  1.6495
## beta_2  0.2430  0.5332  0.6898  0.8557  1.1760
plot(test_odds)

#plot of iterations vs. sampled values for each variable in the chain, with a separate plot for variables.
traceplot(test_odds)

densplot(test_odds)

autocorr.plot(test_odds)

#Issue with paratimization? - trace plots should be stationary.
#PART 2#
#Bayesian Approach and Statstical Significance#
#A Bayesian statistician aims to apply posterior distribution to assess evidence that OR is not 1
#p(H0)=p(M0) and p(H1)=p(M1)
#We have a prior probability for each hypothesis
#Posterior odds = Bayes Factor * Prior Odds
library(rjags)
model.1 <- 
  "model{
###data model
for(i in 1:N){
Y.D[i] ~ dbin(p[i],1)

logit(p[i]) <- beta_0+beta_1*X.E[i]+beta_2*X.age[i]
}
OR <- exp(beta_1)
pos.prob <- step(OR-2)

###prior
beta_0 ~dnorm(0,0.0001)
beta_1 ~dnorm(0,0.0001)
beta_2 ~dnorm(0,0.0001)
}"

data.1 <-list(N=331, X.E=X.E, Y.D=Y.D, X.age=X.age)

model_odds <- jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 331
##    Unobserved stochastic nodes: 3
##    Total graph size: 1016
## 
## Initializing model
#Bayesian hypothesis testing is based on the posterior odds
#Bayes factor shows the evidence provided by the data
test_odds <- coda.samples(model_odds, c("OR", "pos.prob"), n.iter=10000)
summary(test_odds)
## 
## Iterations = 1001:11000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean     SD Naive SE Time-series SE
## OR       3.006 0.9912 0.009912       0.019890
## pos.prob 0.869 0.3374 0.003374       0.005187
## 
## 2. Quantiles for each variable:
## 
##           2.5%   25%   50%   75% 97.5%
## OR       1.555 2.299 2.837 3.543 5.377
## pos.prob 0.000 1.000 1.000 1.000 1.000
plot(test_odds)

#If we were to apply sharp hypothesis of 
#logit(pij) = B0+B1X1+B2X2+B3X1X2
#H0: B3=0 H1:B3 not zero 
library(rjags)

model.1 <- 
  "model{
     for(i in 1:N){
    Y.D[i] ~ dbin(p[i], 1)
  
     logit(p[i]) <- beta_0+beta_1*X.E[i]+beta_2*X.age[i]+beta_3*X.E[i]*X.age[i]
                        }
    OR.young <- exp(beta_1)
    OR.old   <- exp(beta_1+beta_3)
    ROR <- exp(beta_3)

 ### prior
      beta_0 ~ dnorm(0,0.0001)
      beta_1 ~ dnorm(0,0.0001)
      beta_2 ~ dnorm(0,0.0001)
      beta_3 ~ dnorm(0,0.0001)
                 }"

 data.1 <- list(N=331, X.E=X.E, Y.D=Y.D, X.age=X.age)

 model_odds <- jags.model(textConnection(model.1), data=data.1)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 331
##    Unobserved stochastic nodes: 4
##    Total graph size: 1021
## 
## Initializing model
#95% credible interval includes zero. Hence, B3 is not significantly differ from zero. 
#We fail to reject the null that B3 is zero. 
#No interaction
test_odds<-coda.samples(model_odds, c("OR.young","OR.old","ROR","beta_1","beta_2","beta_3"),n.iter=10000)
summary(test_odds)
## 
## Iterations = 1001:11000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##             Mean     SD Naive SE Time-series SE
## OR.old   3.28071 1.7403 0.017403       0.022548
## OR.young 3.10804 1.2739 0.012739       0.036326
## ROR      1.22806 0.8479 0.008479       0.018963
## beta_1   1.05599 0.3954 0.003954       0.011472
## beta_2   0.76869 0.2706 0.002706       0.009444
## beta_3   0.01547 0.6130 0.006130       0.015239
## 
## 2. Quantiles for each variable:
## 
##             2.5%     25%     50%    75% 97.5%
## OR.old    1.1834  2.1169 2.88725 3.9826 7.714
## OR.young  1.3213  2.1909 2.87930 3.7490 6.298
## ROR       0.3060  0.6702 1.01528 1.5330 3.442
## beta_1    0.2786  0.7843 1.05755 1.3215 1.840
## beta_2    0.2311  0.5880 0.76638 0.9501 1.301
## beta_3   -1.1843 -0.4002 0.01517 0.4272 1.236
plot(test_odds)

traceplot(test_odds)

densplot(test_odds)

autocorr.plot(test_odds)