rm(list=ls())
library("mlr")
## Loading required package: ParamHelpers
dat=read.table("HW4.txt",header=T)
dat.glm=glm(Pain~.,data=dat,family=binomial)
dat=cbind(rep(1,length(dat)),dat,createDummyFeatures(dat$Treatment,method="reference"),createDummyFeatures(dat$Gender,method="reference"),createDummyFeatures(dat$Pain,method="reference"))
head(dat)
## rep(1, length(dat)) Treatment Gender Age Duration Pain B P M Yes
## 1 1 P F 68 1 No 0 1 0 0
## 2 1 P M 66 26 Yes 0 1 1 1
## 3 1 A F 71 12 No 0 0 0 0
## 4 1 A M 71 17 Yes 0 0 1 1
## 5 1 B F 66 12 No 1 0 0 0
## 6 1 A F 64 17 No 0 0 0 0
dat1=as.matrix(dat[,c(1,4:5,7:10)])
head(dat1)
## rep(1, length(dat)) Age Duration B P M Yes
## 1 1 68 1 0 1 0 0
## 2 1 66 26 0 1 1 1
## 3 1 71 12 0 0 0 0
## 4 1 71 17 0 0 1 1
## 5 1 66 12 1 0 0 0
## 6 1 64 17 0 0 0 0
dat2=dat1
dat2[,2]=scale(dat1[,2])
dat2[,3]=scale(dat1[,3])
head(dat2)
## rep(1, length(dat)) Age Duration B P M Yes
## 1 1 -0.3950376 -1.35472785 0 1 0 0
## 2 1 -0.7804401 0.79791174 0 1 1 1
## 3 1 0.1830662 -0.40756643 0 0 0 0
## 4 1 0.1830662 0.02296149 0 0 1 1
## 5 1 -0.7804401 -0.40756643 1 0 0 0
## 6 1 -1.1658426 0.02296149 0 0 0 0
summary(dat.glm)
##
## Call:
## glm(formula = Pain ~ ., family = binomial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7638 -0.5904 -0.1952 0.6151 2.3153
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -20.588282 7.102883 -2.899 0.00375 **
## TreatmentB -0.526853 0.937025 -0.562 0.57394
## TreatmentP 3.181690 1.016021 3.132 0.00174 **
## GenderM 1.832202 0.796206 2.301 0.02138 *
## Age 0.262093 0.097012 2.702 0.00690 **
## Duration -0.005859 0.032992 -0.178 0.85905
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 81.503 on 59 degrees of freedom
## Residual deviance: 48.736 on 54 degrees of freedom
## AIC: 60.736
##
## Number of Fisher Scoring iterations: 5
dat.glm1=glm(Yes~.,data=as.data.frame(dat2[,-1]),family=binomial)
summary(dat.glm1)
##
## Call:
## glm(formula = Yes ~ ., family = binomial, data = as.data.frame(dat2[,
## -1]))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7638 -0.5904 -0.1952 0.6151 2.3153
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.32668 0.87448 -2.661 0.00780 **
## Age 1.36010 0.50343 2.702 0.00690 **
## Duration -0.06804 0.38316 -0.178 0.85905
## B -0.52685 0.93702 -0.562 0.57394
## P 3.18169 1.01602 3.132 0.00174 **
## M 1.83220 0.79621 2.301 0.02138 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 81.503 on 59 degrees of freedom
## Residual deviance: 48.736 on 54 degrees of freedom
## AIC: 60.736
##
## Number of Fisher Scoring iterations: 5
## c(-20.588,-0.527,3.182,1.832,0.262,-0.00586)
########### set up the functions for Bayesian inferences ###########
logit = function(x) {log(x/(1-x))}
expit = function(x) {exp(x)/(1+exp(x))}
#joint log posterior of beta|y for the logistic regression model:
logpost = function(y,X,beta)
{
xbeta = X%*%beta
## xbeta = ifelse(xbeta>10,10,xbeta)
like = sum(dbinom(y,1,expit(xbeta),log = TRUE))
## prior = 1
return(like)
}
Bayes.logistic = function(y,X,m = 10000, burn = 1000){
p = ncol(X)
keep.beta = matrix(0,m,p)
keep.beta[1,]=beta
acc = 0
att = 0
can.sd=c(1.1,0.7,0.3,0.65,1.5,1)
## can.sd=c(1,1,1,1,1,1)
for(i in 2:m){
# logpost value at current samples
cur_logpost = logpost(y,X,keep.beta[i-1,])
# Update beta using MH sampling:
canbeta = rep(0,p)
# Draw candidate:
for ( j in 1:p)
{
canbeta[j] = rnorm(1,keep.beta[i-1,j],can.sd[j])
}
att=att+1
# Compute acceptance ratio:
can_logpost = logpost(y,X,canbeta)
R = exp(can_logpost-cur_logpost)
U = runif(1)
if(U<R){
beta = canbeta
cur_logpost = can_logpost
acc = acc+1
}
keep.beta[i,] = beta
}
list(beta = keep.beta,acc.rate = acc/att,sd=can.sd)}
####################### fit the Bayesian model #################
library("coda")
m = 100000
burn = 5000
#Initial values:
beta = as.matrix(c(0,0,0,0,0,0))
chain1 = Bayes.logistic(dat2[,7],dat2[,-7],m = m,burn = burn)
beta = as.matrix(c(5,5,5,5,5,5))
chain2 = Bayes.logistic(dat2[,7],dat2[,-7],m = m,burn = burn)
par(mfrow=c(2,3))
chain1$beta=as.mcmc(chain1$beta)
traceplot(chain1$beta) ## ,main=matrix(chain1$sd,1,6)

chain2$beta=as.mcmc(chain2$beta)
traceplot(chain2$beta)

pick=seq(5000,100000,100)
print(round(apply((chain1$beta[pick,]+chain2$beta[pick,])/2,2,summary),3))
## [,1] [,2] [,3] [,4] [,5] [,6]
## Min. -5.408 0.541 -0.980 -3.055 1.367 0.579
## 1st Qu. -3.199 1.333 -0.297 -1.149 3.187 1.748
## Median -2.686 1.575 -0.088 -0.623 3.670 2.169
## Mean -2.754 1.604 -0.095 -0.662 3.726 2.190
## 3rd Qu. -2.266 1.838 0.110 -0.147 4.187 2.598
## Max. -0.796 3.116 1.016 1.742 6.435 4.814
par(mar=c(2,2,2,2))
acfplot(as.mcmc(chain1$beta[pick,]))

acfplot(as.mcmc(chain2$beta[pick,]))

mh.list = mcmc.list(list(mcmc(chain1$beta[pick,]),mcmc(chain2$beta[pick,])))
gelman.diag(mh.list)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## [1,] 1.003 1.017
## [2,] 1.000 1.001
## [3,] 1.001 1.006
## [4,] 0.999 0.999
## [5,] 1.000 1.002
## [6,] 1.003 1.019
##
## Multivariate psrf
##
## 1.01
gelman.plot(mh.list)

quantile((chain1$beta[pick,4]+chain2$beta[pick,4])/2,c(0.025, 0.975))
## 2.5% 97.5%
## -2.1573591 0.6876185
quantile((chain1$beta[pick,5]+chain2$beta[pick,5])/2,c(0.025, 0.975))
## 2.5% 97.5%
## 2.333335 5.342009
quantile((chain1$beta[pick,4]+chain2$beta[pick,4]-chain1$beta[pick,5]-chain2$beta[pick,5])/2,c(0.025, 0.975))
## 2.5% 97.5%
## -6.194514 -2.776843
exp(-3.799)
## [1] 0.02239315
exp(-3.784)
## [1] 0.02273158
exp(-2.312)
## [1] 0.09906293
exp(-5.409)
## [1] 0.004476114
##acf(chain1$beta[pick,])
################ Compare with the likelihood-based inferences from glm #################
summary(dat.glm)
##
## Call:
## glm(formula = Pain ~ ., family = binomial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7638 -0.5904 -0.1952 0.6151 2.3153
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -20.588282 7.102883 -2.899 0.00375 **
## TreatmentB -0.526853 0.937025 -0.562 0.57394
## TreatmentP 3.181690 1.016021 3.132 0.00174 **
## GenderM 1.832202 0.796206 2.301 0.02138 *
## Age 0.262093 0.097012 2.702 0.00690 **
## Duration -0.005859 0.032992 -0.178 0.85905
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 81.503 on 59 degrees of freedom
## Residual deviance: 48.736 on 54 degrees of freedom
## AIC: 60.736
##
## Number of Fisher Scoring iterations: 5