setwd("/Users/miche/Desktop")
Example of Hbf dataset: Note, the posterior mean is between prior and sample mean. Weights depend on sample size. The posterior mean will always between sample mean and prior mean. Check 5% error: Monte Carlo error for each parameter of interest should be less than about 5% of the sample standard deviation.
library(rjags)
## Warning: package 'rjags' was built under R version 3.4.3
## Loading required package: coda
## Warning: package 'coda' was built under R version 3.4.3
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
model.1 <-
"model {
for (i in 1:N) {
hbf[i] ~ dnorm(b.0,tau.t)
}
## prior on precision parameters
tau.t ~ dgamma(1,1);
### prior on mean given precision
mu.0 <- 15
tau.0 <- 1/36
b.0 ~ dnorm(mu.0, tau.0);
}
"
data.1 <- source("saudi.data.2(1).txt")[[1]]
model_mean <- jags.model(textConnection(model.1), data=data.1, n.adapt=1000)
## Warning in jags.model(textConnection(model.1), data = data.1, n.adapt =
## 1000): Unused variable "HU" in data
## Warning in jags.model(textConnection(model.1), data = data.1, n.adapt =
## 1000): Unused variable "sex" in data
## Warning in jags.model(textConnection(model.1), data = data.1, n.adapt =
## 1000): Unused variable "age" in data
## Warning in jags.model(textConnection(model.1), data = data.1, n.adapt =
## 1000): Unused variable "rs766432" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 283
## Unobserved stochastic nodes: 2
## Total graph size: 290
##
## Initializing model
test_mean <- coda.samples(model_mean, c('b.0'), n.iter = 10000)
summary(test_mean)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## 16.498121 0.460438 0.004604 0.004604
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## 15.59 16.19 16.50 16.81 17.40
plot(test_mean)
Example of making new predcition without log-normal transformation To make new prediction, we need to take into account of the variability from: the variability in the parameters and the variability of the new observation. Distribution of the new obs - distribution of the parameters with past data.
library(rjags)
model.1 <-
"model {
for (i in 1:N) {
hbf[i] ~ dnorm(b.0,tau.t)
}
## prior on precision parameters
tau.t ~ dgamma(1,1);
### prior on mean given precision
mu.0 <- 5
tau.0 <- 0.44
b.0 ~ dnorm(mu.0, tau.0);
### prediction
hbf.new ~ dnorm(b.0,tau.t)
pred <- step(hbf.new-20)
}
"
data.1 <- source("saudi.data.2(1).txt")[[1]]
model_mean <- jags.model(textConnection(model.1), data=data.1)
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "HU" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "sex" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "age" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "rs766432" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 283
## Unobserved stochastic nodes: 3
## Total graph size: 293
##
## Initializing model
test_mean <- coda.samples(model_mean, c('b.0','hbf.new','pred'), n.iter = 10000)
summary(test_mean)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## b.0 15.5119 0.4511 0.004511 0.004644
## hbf.new 15.4953 7.7697 0.077697 0.075900
## pred 0.2833 0.4506 0.004506 0.004506
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## b.0 14.6205 15.21 15.51 15.81 16.39
## hbf.new 0.3118 10.20 15.53 20.77 30.61
## pred 0.0000 0.00 0.00 1.00 1.00
#With Gibbs sampling, we generate sample values from the posterior distribution of beta0, and tau|y
plot(test_mean)
Log normal model
library(rjags)
model.1 <-
"model {
for (i in 1:N) {
hbf[i] ~ dlnorm(lb.0,tau.t)
}
## prior on precision parameters
tau.t ~ dgamma(1,1);
### prior on mean
lb.0 ~ dnorm(1.6,11);
b.0 <- exp(lb.0)
### prediction
hbf.new ~ dlnorm(lb.0,tau.t)
pred <- step(hbf.new-20)
}
"
data.1 <- source("saudi.data.2(1).txt")[[1]]
model_mean <- jags.model(textConnection(model.1), data=data.1)
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "HU" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "sex" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "age" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "rs766432" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 283
## Unobserved stochastic nodes: 3
## Total graph size: 294
##
## Initializing model
test_mean <- coda.samples(model_mean, c('b.0','hbf.new','pred'), n.iter = 10000)
summary(test_mean)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## b.0 14.251 0.4884 0.004884 0.006137
## hbf.new 16.834 10.7012 0.107012 0.107636
## pred 0.278 0.4480 0.004480 0.004519
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## b.0 13.305 13.916 14.25 14.57 15.25
## hbf.new 4.588 9.598 14.30 21.02 44.31
## pred 0.000 0.000 0.00 1.00 1.00
plot(test_mean)
Log normal model using uninformative prior
library(rjags)
model.1 <-
"model {
for (i in 1:N) {
hbf[i] ~ dlnorm(lb.0,tau.t)
}
## prior on precision parameters
tau.t ~ dgamma(1,1);
### prior on mean
lb.0 ~ dnorm(1,0.0001);
b.0 <- exp(lb.0)
### prediction
hbf.new ~ dlnorm(lb.0,tau.t)
pred <- step(hbf.new-20)
}
"
data.1 <- source("saudi.data.2(1).txt")[[1]]
model_mean <- jags.model(textConnection(model.1), data=data.1)
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "HU" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "sex" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "age" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "rs766432" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 283
## Unobserved stochastic nodes: 3
## Total graph size: 293
##
## Initializing model
test_mean <- coda.samples(model_mean, c('b.0','hbf.new','pred'), n.iter = 10000)
summary(test_mean)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## b.0 14.453 0.4997 0.004997 0.006875
## hbf.new 17.231 11.1410 0.111410 0.113722
## pred 0.285 0.4514 0.004514 0.004514
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## b.0 13.50 14.114 14.45 14.78 15.44
## hbf.new 4.76 9.827 14.44 21.30 46.33
## pred 0.00 0.000 0.00 1.00 1.00
plot(test_mean)
Linear Regression
library(rjags)
model.1 <-
"model {
for (i in 1:N) {
hbf[i] ~ dlnorm(mu[i],tau.t)
mu[i] <- b.0+b.1 *equals(rs766432[i],2)+b.2 *equals(rs766432[i],3)
}
## prior on precision parameters
tau.t ~ dgamma(1,1);
### prior on mean given precision
b.0 ~ dnorm(0, 0.001);
b.1 ~ dnorm(0, 0.001);
b.2 ~ dnorm(0, 0.001);
### prediction
lmu.1 <- (b.0); hbf.1 ~ dlnorm( lmu.1,tau.t); pred.1 <- step(hbf.1-20)
lmu.2 <- (b.0+b.1); hbf.2 ~ dlnorm( lmu.2,tau.t); pred.2 <- step(hbf.2-20)
lmu.3 <- (b.0+b.2); hbf.3 ~ dlnorm( lmu.3,tau.t); pred.3 <- step(hbf.3-20)
### fitted means by genotypes
mu.1 <- exp(lmu.1)
mu.2 <- exp(lmu.2)
mu.3 <- exp(lmu.3)
parameter.b[1] <- b.0; parameter.b[2] <- b.1; parameter.b[3] <- b.2
parameter.h[1] <- hbf.1; parameter.h[2] <- hbf.2; parameter.h[3] <- hbf.3;
parameter.m[1] <- mu.1; parameter.m[2] <- mu.2; parameter.m[3] <- mu.3
parameter.p[1] <- pred.1; parameter.p[2] <- pred.2; parameter.p[3] <- pred.3
}
"
data.1 <- source("saudi.data.2(1).txt")[[1]]
model_hbf <- jags.model(textConnection(model.1), data=data.1)
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "HU" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "sex" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "age" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 283
## Unobserved stochastic nodes: 7
## Total graph size: 604
##
## Initializing model
test_hbf <- coda.samples(model_hbf, c('parameter.b','parameter.h','parameter.m','parameter.p'),
n.iter = 10000)
summary(test_hbf)
##
## Iterations = 1001:11000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## parameter.b[1] 2.9358 0.1413 0.001413 0.010271
## parameter.b[2] -0.1638 0.1513 0.001513 0.010558
## parameter.b[3] -0.3649 0.1488 0.001488 0.010547
## parameter.h[1] 22.4765 14.6091 0.146091 0.255727
## parameter.h[2] 18.8375 11.7802 0.117802 0.117802
## parameter.h[3] 15.3240 9.6661 0.096661 0.096661
## parameter.m[1] 19.0257 2.6925 0.026925 0.194186
## parameter.m[2] 16.0146 0.8656 0.008656 0.010650
## parameter.m[3] 13.0921 0.6054 0.006054 0.007256
## parameter.p[1] 0.4590 0.4983 0.004983 0.007532
## parameter.p[2] 0.3475 0.4762 0.004762 0.004952
## parameter.p[3] 0.2224 0.4159 0.004159 0.004159
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## parameter.b[1] 2.6556 2.8406 2.9369 3.03246 3.20607
## parameter.b[2] -0.4535 -0.2670 -0.1668 -0.06231 0.13960
## parameter.b[3] -0.6544 -0.4674 -0.3669 -0.26419 -0.06972
## parameter.h[1] 5.8260 12.6127 18.8156 28.29715 60.28516
## parameter.h[2] 5.1846 10.8750 16.0875 23.38962 48.97672
## parameter.h[3] 4.3565 8.8129 12.9830 18.96804 40.26932
## parameter.m[1] 14.2330 17.1262 18.8575 20.74813 24.68193
## parameter.m[2] 14.3683 15.4292 15.9898 16.57793 17.78602
## parameter.m[3] 11.9414 12.6824 13.0742 13.49221 14.32264
## parameter.p[1] 0.0000 0.0000 0.0000 1.00000 1.00000
## parameter.p[2] 0.0000 0.0000 0.0000 1.00000 1.00000
## parameter.p[3] 0.0000 0.0000 0.0000 0.00000 1.00000
plot(test_hbf)
autocorr.plot(test_hbf)
Example of HbF expression – regression with 2 parameters, removed b.1
library(rjags)
model.1 <-
"model {
for (i in 1:N) {
hbf[i] ~ dlnorm(mu[i],tau.t)
mu[i] <- b.0+b.2 *equals(rs766432[i],3)
}
## prior on precision parameters
tau.t ~ dgamma(1,1);
### prior on mean given precision
b.0 ~ dnorm(0, 0.001);
b.2 ~ dnorm(0, 0.001);
### prediction
lmu.1 <- (b.0); hbf.1 ~ dlnorm( lmu.1,tau.t); pred.1 <- step(hbf.1-20)
lmu.3 <- (b.0+b.2); hbf.3 ~ dlnorm( lmu.3,tau.t); pred.3 <- step(hbf.3-20)
### fitted means by genotypes
mu.1 <- exp(lmu.1)
mu.3 <- exp(lmu.3)
parameter.b[1] <- b.0; parameter.b[2] <- b.2
parameter.h[1] <- hbf.1; parameter.h[2] <- hbf.3;
parameter.m[1] <- mu.1; parameter.m[2] <- mu.3
parameter.p[1] <- pred.1; parameter.p[2] <- pred.3
}
"
data.1 <- source("saudi.data.2(1).txt")[[1]]
model_hbf <- jags.model(textConnection(model.1), data=data.1)
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "HU" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "sex" in data
## Warning in jags.model(textConnection(model.1), data = data.1): Unused
## variable "age" in data
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 283
## Unobserved stochastic nodes: 5
## Total graph size: 591
##
## Initializing model
test_hbf <- coda.samples(model_hbf, c('parameter.b','parameter.h','parameter.m','parameter.p'),
n.iter = 10000)
test_hbf <- coda.samples(model_hbf, c('parameter.b','parameter.h','parameter.m','parameter.p'),
thin=5, n.iter = 20000)
summary(test_hbf)
##
## Iterations = 11005:31000
## Thinning interval = 5
## Number of chains = 1
## Sample size per chain = 4000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## parameter.b[1] 2.7915 0.04922 0.0007782 0.000921
## parameter.b[2] -0.2218 0.06782 0.0010723 0.001260
## parameter.h[1] 19.0659 11.71377 0.1852110 0.184685
## parameter.h[2] 15.3225 9.31705 0.1473155 0.147315
## parameter.m[1] 16.3254 0.80484 0.0127257 0.015059
## parameter.m[2] 13.0753 0.60196 0.0095179 0.009518
## parameter.p[1] 0.3503 0.47711 0.0075437 0.007544
## parameter.p[2] 0.2290 0.42024 0.0066446 0.006645
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## parameter.b[1] 2.6959 2.7573 2.7908 2.8243 2.88845
## parameter.b[2] -0.3552 -0.2657 -0.2214 -0.1761 -0.09004
## parameter.h[1] 5.3747 11.1099 15.9917 23.9624 49.31998
## parameter.h[2] 4.1935 8.9013 13.0446 19.2367 39.92112
## parameter.m[1] 14.8194 15.7574 16.2936 16.8500 17.96546
## parameter.m[2] 11.9218 12.6639 13.0653 13.4743 14.27124
## parameter.p[1] 0.0000 0.0000 0.0000 1.0000 1.00000
## parameter.p[2] 0.0000 0.0000 0.0000 0.0000 1.00000
plot(test_hbf)
autocorr.plot(test_hbf)