hdl.data <- read.table("hdl-data(2).txt",sep="\t",header=T)
hdl.data$BMI= hdl.data$BMI5
### summary
summary(hdl.data)
## HDL5 BMI5 SEX AGE5
## Min. : 16.00 Min. :14.36 Min. :1.000 Min. :26.00
## 1st Qu.: 39.00 1st Qu.:23.82 1st Qu.:1.000 1st Qu.:47.00
## Median : 48.00 Median :26.60 Median :2.000 Median :54.00
## Mean : 50.37 Mean :27.34 Mean :1.539 Mean :54.54
## 3rd Qu.: 59.00 3rd Qu.:29.82 3rd Qu.:2.000 3rd Qu.:62.00
## Max. :129.00 Max. :53.70 Max. :2.000 Max. :84.00
## NA's :35 NA's :25
## BMI
## Min. :14.36
## 1st Qu.:23.82
## Median :26.60
## Mean :27.34
## 3rd Qu.:29.82
## Max. :53.70
## NA's :25
new.hdl.data <- na.omit(hdl.data)
plot(new.hdl.data)
par(mfrow=c(1,2))
hist(new.hdl.data[,1],main="")
hist(log(new.hdl.data[,1]),main="")
#Generate data in format for jags
data.fhs <- list(N = nrow(new.hdl.data),
hdl=as.numeric(new.hdl.data[,1]),
age=as.numeric(new.hdl.data$AGE5),
gender=as.numeric(new.hdl.data$SEX),bmi=new.hdl.data$BMI)
Bayesian analysis
model.fhs <- "
model {
#likelihood
for (i in 1:N) {
hdl[i] ~ dnorm(mu[i],tau)
mu[i] <- beta.0+beta.age*(age[i]-mean.age)+beta.gender*(gender[i]-1)+
beta.bmi*(bmi[i]-mean.bmi) }
#prior
tau ~ dgamma(1,1);
beta.0 ~ dnorm(0, 0.0001);
beta.age ~ dnorm(0, 0.0001);
beta.gender ~ dnorm(0, 0.0001);
beta.bmi ~ dnorm(0, 0.0001);
#predicted HDL in females with age 50 and bmi 25
mean.hdl.F <- beta.0+beta.age*(50-mean.age)+ beta.bmi*(25-mean.bmi)
mean.hdl.M <- beta.0+beta.age*(50-mean.age)+ beta.bmi*(25-mean.bmi) +beta.gender
mean.age <- mean(age[])
mean.bmi <- mean(bmi[])
}"
jags.fhs <- jags.model(textConnection(model.fhs),data=data.fhs, n.adapt=1500)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 3467
## Unobserved stochastic nodes: 5
## Total graph size: 22494
##
## Initializing model
test.fhs <- coda.samples(jags.fhs, c('beta.0','beta.age','beta.gender','beta.bmi'), n.adapt=1500, n.iter=10000)
summary(test.fhs)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta.0 43.95995 0.33618 0.0033618 0.0069054
## beta.age 0.01315 0.02221 0.0002221 0.0002221
## beta.bmi -0.84907 0.04478 0.0004478 0.0004556
## beta.gender 11.97958 0.45676 0.0045676 0.0084784
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta.0 43.30816 43.736824 43.96051 44.17663 44.60588
## beta.age -0.03005 -0.001753 0.01332 0.02808 0.05731
## beta.bmi -0.93631 -0.879628 -0.84883 -0.81872 -0.76155
## beta.gender 11.09366 11.671730 11.97722 12.29057 12.87017
traceplot(test.fhs)
densplot(test.fhs)
autocorr.plot(test.fhs)
out <- as.matrix(test.fhs)
out[1:10,]
## beta.0 beta.age beta.bmi beta.gender
## [1,] 49.85206 -0.0546058843 -0.6912623 6.886772
## [2,] 46.24220 0.0120106481 -0.8558557 9.740644
## [3,] 45.40160 0.0183040548 -0.8301494 10.527539
## [4,] 45.06289 0.0173104936 -0.8354231 11.190463
## [5,] 44.63175 0.0299360728 -0.8597942 11.719038
## [6,] 43.91356 0.0011307815 -0.8020900 12.132566
## [7,] 43.68388 0.0019151412 -0.8505473 12.268721
## [8,] 44.05028 0.0037180365 -0.8335221 12.232170
## [9,] 43.76632 0.0004079347 -0.8565991 11.952126
## [10,] 44.11824 -0.0137430637 -0.8574698 12.099294
plot(out[,1],out[,2])
model.fhs <- "
model {
#likelihood
for (i in 1:N) {
hdl[i] ~ dnorm(mu[i],tau)
mu[i] <- beta.0+beta.gender*(gender[i]-1)+ beta.bmi*(bmi[i]-mean.bmi)
#residuals
r[i] <- hdl[i]-mu[i]
sr[i] <- r[i]*sqrt(tau)}
#prior
tau ~ dgamma(1,1);
beta.0 ~ dnorm(0, 0.0001);
beta.gender ~ dnorm(0, 0.0001);
beta.bmi ~ dnorm(0, 0.0001);
#predicted HDL in females with bmi 25
mean.hdl.F <- beta.0+ beta.bmi*(25-mean.bmi)
mean.hdl.M <- beta.0+ beta.bmi*(25-mean.bmi) +beta.gender
mean.bmi <- mean(bmi[])
}"
jags.fhs <- jags.model(textConnection(model.fhs),data=data.fhs, n.adapt=1500)
## Warning in jags.model(textConnection(model.fhs), data = data.fhs, n.adapt =
## 1500): Unused variable "age" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 3467
## Unobserved stochastic nodes: 4
## Total graph size: 25054
##
## Initializing model
test.fhs <- coda.samples(jags.fhs, c('beta.0','beta.gender','beta.bmi'), n.adapt=1500, n.iter=10000)
summary(test.fhs)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta.0 43.9572 0.33547 0.0033547 0.0060491
## beta.bmi -0.8482 0.04484 0.0004484 0.0004484
## beta.gender 11.9775 0.44763 0.0044763 0.0081419
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta.0 43.3077 43.7383 43.9554 44.1787 44.6083
## beta.bmi -0.9361 -0.8779 -0.8481 -0.8184 -0.7581
## beta.gender 11.1156 11.6789 11.9801 12.2735 12.8568
traceplot(test.fhs)
densplot(test.fhs)
autocorr.plot(test.fhs)
test.fhs <- coda.samples(jags.fhs, c('r','sr'), n.adapt=1500, n.iter=10000)
out <- summary(test.fhs)
plot(new.hdl.data[,1], out[[1]][1:3467,1])