One thing to note is that the code changes you have to make between questions often are minimal. Yet we go from running a simple binomial model to running a pretty advanced hierarchical linear model.
All answers below use wide “sloppy” uniform priors, and these could certainly be shaped up, and be made more informative.
Not much to do here.
library(rjags)
data_list <- list(
y1 =c(0, 1, 0, 0, 0, 0, 1, 0, 0, 0),
y2 = c(0, 0, 1, 1, 1, 0, 1, 1, 1, 0))
# The model specification
model_string <- "model{
# Priors
theta1 ~ dbeta(1, 1)
theta2 ~ dbeta(1, 1)
# The data model
for(i in 1:length(y1) ) {
y1[i] ~ dbern(theta1)
}
for(i in 1:length(y2) ) {
y2[i] ~ dbern(theta2)
}
}"
params_to_monitor <- c("theta1", "theta2")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 20
## Unobserved stochastic nodes: 2
## Total graph size: 26
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
plot(mcmc_samples)
summary(mcmc_samples)
##
## Iterations = 1001:6000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 5000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## theta1 0.249 0.120 0.00098 0.000981
## theta2 0.584 0.135 0.00111 0.001120
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## theta1 0.0605 0.159 0.235 0.326 0.516
## theta2 0.3096 0.491 0.588 0.681 0.832
s <- as.matrix(mcmc_samples)
mean(abs(s[,"theta2"] - s[,"theta1"]) < 0.2)
## [1] 0.2233
cowA <- c(0, 1, 0, 0, 0, 0, 1, 0, 0, 0)
cowB <- c(0, 0, 1, 1, 1, 0, 1, 1, 1, 0)
# Using the same model as in Question 1, just using the new data.
data_list <- list(y1 = cowA, y2 = cowB)
params_to_monitor <- c("theta1", "theta2")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 20
## Unobserved stochastic nodes: 2
## Total graph size: 26
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
plot(mcmc_samples)
Calculate the probability that medicine A is better than medicine B.
s <- as.matrix(mcmc_samples)
mean(s[,"theta1"] > s[,"theta2"])
## [1] 0.04073
So should probably go with medicine B then…
diet_milk <- c(651, 679, 374, 601, 401, 609, 767, 709, 704, 679)
normal_milk <- c(798, 1139, 529, 609, 553, 743, 151, 544, 488, 555, 257, 692, 678, 675, 538)
data_list <- list(y1 = diet_milk, y2 = normal_milk)
# The new model using the Normal distribution
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
mu1 ~ dunif(0, 10000)
mu2 ~ dunif(0, 10000)
sigma1 ~ dunif(0, 10000)
sigma2 ~ dunif(0, 10000)
# The data model
for(i in 1:length(y1) ) {
y1[i] ~ dnorm(mu1, 1 / pow( sigma1, 2))
}
for(i in 1:length(y2) ) {
y2[i] ~ dnorm(mu2, 1 / pow( sigma2, 2))
}
}"
params_to_monitor <- c("mu1", "mu2", "sigma1", "sigma2")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 25
## Unobserved stochastic nodes: 4
## Total graph size: 141
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
Is it likely that diet is going to make the cows produce more milk on average?
s <- as.matrix(mcmc_samples)
mu_diff <- s[,"mu2"] - s[,"mu1"]
hist(mu_diff)
mean(mu_diff > 0)
## [1] 0.4037
mean(mu_diff < 0)
## [1] 0.5963
It is almost as likely that the diet is better as that the diet is worse. So this experiment does not support that the diet will result in the cows producing more milk .
diet_milk <- c(651, 679, 374, 601, 4000, 401, 609, 767, 3890, 704, 679)
normal_milk <- c(798, 1139, 529, 609, 553, 743, 3,151, 544, 488, 15, 257, 692, 678, 675, 538)
data_list <- list(y1 = diet_milk, y2 = normal_milk)
# The new model using the t-distribution with 3 degrees of freedom instead of the Normal
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
mu1 ~ dunif(0, 10000)
mu2 ~ dunif(0, 10000)
sigma1 ~ dunif(0, 10000)
sigma2 ~ dunif(0, 10000)
# The data model
for(i in 1:length(y1) ) {
y1[i] ~ dt(mu1, 1 / pow( sigma1, 2), 3)
}
for(i in 1:length(y2) ) {
y2[i] ~ dt(mu2, 1 / pow( sigma2, 2), 3)
}
}"
params_to_monitor <- c("mu1", "mu2", "sigma1", "sigma2")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 27
## Unobserved stochastic nodes: 4
## Total graph size: 205
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
Is it likely that diet is going to make the cows produce more milk on average?
s <- as.matrix(mcmc_samples)
mu_diff <- s[,"mu2"] - s[,"mu1"]
hist(mu_diff)
mean(mu_diff > 0)
## [1] 0.2509
mean(mu_diff < 0)
## [1] 0.7491
Again there is no strong evidence that the diet is any good (but compare with the result, would you have used the original Normal model).
diet_eggs <- c(6, 4, 2, NA, 4, 3, 0, 4, 0, 6, 3)
normal_eggs <- c(4, 2, 1, 1, 2, 1, 2, 1, NA, 2, 1)
data_list <- list(y1 = diet_eggs, y2 = normal_eggs)
# The new model using the t-distribution with 3 degrees of freedom instead of the Normal
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
lambda1 ~ dunif(0, 10000)
lambda2 ~ dunif(0, 10000)
# The data model
for(i in 1:length(y1) ) {
y1[i] ~ dpois(lambda1)
}
for(i in 1:length(y2) ) {
y2[i] ~ dpois(lambda2)
}
}"
params_to_monitor <- c("lambda1", "lambda2")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 20
## Unobserved stochastic nodes: 4
## Total graph size: 28
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
Is it likely that diet going to make the chickens produce more eggs on average?
s <- as.matrix(mcmc_samples)
lambda_diff <- s[,"lambda1"] - s[,"lambda2"]
hist(lambda_diff)
mean(lambda_diff > 0)
## [1] 0.9843
mean(lambda_diff < 0)
## [1] 0.01573
There is pretty good evidence that the diet is effective and that chickens on the diet produce more eggs on average (that is, lambda1 seems higher than lambda2). Looking at lambda_diff a “best guess” is that the diet results in around 1-2 more eggs on average. Notice that we didn’t have to do anything to handle the NA values, JAGS does a lot of magic behind the scenes and took care of that for us.
This implements the same model as in question 4, but using smarter indexing so that the code is not as redundant and so that it works with the format of the data in the data.frame d .
d <-
structure(list(milk = c(651, 679, 374, 601, 401, 609, 767, 709,
704, 679, 798, 1139, 529, 609, 553, 743, 151, 544, 488, 555,
257, 692, 678, 675, 538), group = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), .Names = c("milk",
"group"), row.names = c(NA, -25L), class = "data.frame")
data_list <- list(y = d$milk, group = d$group, n_group = length(unique(d$group)))
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
for(group_i in 1:n_group) {
mu[group_i] ~ dunif(0, 10000)
sigma[group_i] ~ dunif(0, 10000)
}
for(i in 1:length(y)) {
y[i] ~ dnorm(mu[group[i]], 1 / pow( sigma[group[i]], 2))
}
}"
params_to_monitor <- c("mu", "sigma")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 25
## Unobserved stochastic nodes: 4
## Total graph size: 167
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
This should give you the same result as in question 4.
Amazingly we don’t have to change the model at all from question 7, we can just rerun it with the new data.
d <-
structure(list(milk = c(651, 679, 374, 601, 401, 609, 767, 709,
704, 679, 798, 1139, 529, 609, 553, 743, 151, 544, 488, 555,
257, 692, 678, 675, 538, 1061, 721, 595, 784, 877, 562, 800,
684, 741, 516), group = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3)), .Names = c("milk", "group"), row.names = c(NA, 35L), class = "data.frame")
data_list <- list(y = d$milk, group = d$group, n_group = length(unique(d$group)))
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 35
## Unobserved stochastic nodes: 6
## Total graph size: 235
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
s <- as.matrix(mcmc_samples)
hist(s[,"mu[3]"] - s[,"mu[1]"] )
hist(s[,"mu[3]"] - s[,"mu[2]"] )
mean(s[,"mu[3]"] - s[,"mu[1]"] > 0)
## [1] 0.9295
mean(s[,"mu[3]"] - s[,"mu[2]"] > 0)
## [1] 0.9385
So it is pretty likely that diet 2 (mu[3]) is better than both diet 1 (mu[2]) and using no special diet (mu[1]).
So, let’s change the model from question 7 into a regression model!
d <-
structure(list(milk = c(685, 691, 476, 1151, 879, 725,
1190, 1107, 809, 539, 298, 805, 820, 498, 1026, 1217, 1177, 684,
1061, 834), hours = c(3, 7, 6, 10, 6, 5, 10, 11, 9, 3, 6, 6,
3, 5, 8, 11, 12, 9, 5, 5)), .Names = c("milk", "hours"
), row.names = c(NA, -20L), class = "data.frame")
data_list <- list(y = d$milk, x = d$hours)
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
beta0 ~ dunif(-10000, 10000)
beta1 ~ dunif(-10000, 10000)
sigma ~ dunif(0, 10000)
for(i in 1:length(y)) {
mu[i] <- beta0 + x[i] * beta1
y[i] ~ dnorm(mu[i], 1 / pow( sigma, 2))
}
}"
params_to_monitor <- c("beta0", "beta1", "sigma")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 20
## Unobserved stochastic nodes: 3
## Total graph size: 190
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=5000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
post_sum <- summary(mcmc_samples)
post_sum
##
## Iterations = 2001:7000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 5000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0 404.6 139.4 1.138 5.291
## beta1 61.4 18.6 0.152 0.713
## sigma 223.6 41.1 0.335 0.603
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0 134.5 316.5 401.5 490.5 685.0
## beta1 24.1 49.8 61.7 73.2 97.9
## sigma 160.6 194.9 217.7 246.0 320.1
plot(d$hours, d$milk)
# Plotting a "best guess" for the regression line
median_post_beta0 <- post_sum$quantiles["beta0", "50%"]
median_post_beta1 <- post_sum$quantiles["beta1", "50%"]
abline(median_post_beta0, median_post_beta1, col = "blue", lwd = 3)
# Adding a sample of the posterior draws to the plot in order to visualize the
# uncertainty of the regression line.
s <- as.matrix(mcmc_samples)
for(i in sample(nrow(s), size = 20)) {
abline(s[i,"beta0"], s[i,"beta1"], col = "gray")
}
It seems like there is good evidence that an increase in sunshine (or something that co-varies with sunshine perhaps…) results in an increase in milk production.
d <-
structure(
list(
cow = c(
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4
), hours = c(
2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6,
8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12
),
milk = c(
672, 1013, 808, 486, 648, 720, 1100, 950, 746, 721, 654, 1156, 964, 505,
1104, 903, 560, 817, 829, 975, 1169, 1044, 1722, 1672, 457, 977,
896, 794, 1116, 1155, 1228, 1243
)
), .Names = c("cow", "hours",
"milk"), class = "data.frame", row.names = c(NA,-32L)
)
data_list <- list(y = d$milk, x = d$hours, cow = d$cow, n_cow = length(unique(d$cow)))
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
for(cow_i in 1:n_cow) {
beta0[cow_i] ~ dunif(-10000, 10000)
beta1[cow_i] ~ dunif(-10000, 10000)
sigma[cow_i] ~ dunif(0,10000)
}
for(i in 1:length(y)) {
mu[i] <- beta0[cow[i]] + x[i] * beta1[cow[i]]
y[i] ~ dnorm(mu[i], 1 / pow( sigma[cow[i]], 2))
}
}"
params_to_monitor <- c("beta1")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 32
## Unobserved stochastic nodes: 12
## Total graph size: 398
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=3000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
summary(mcmc_samples)
##
## Iterations = 2001:5000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 3000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta1[1] 20.2 34.4 0.362 1.671
## beta1[2] 19.4 35.5 0.374 1.640
## beta1[3] 106.9 22.5 0.237 0.980
## beta1[4] 64.2 21.8 0.229 0.919
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta1[1] -40.5 1.159 18.9 37.5 86.2
## beta1[2] -52.7 0.454 18.8 39.2 92.6
## beta1[3] 60.1 94.479 106.8 118.9 153.8
## beta1[4] 21.1 51.781 63.7 76.1 110.2
Let’s look at the pairwise difference between the estimated slopes of from each cow:
s <- as.matrix(mcmc_samples)
par(mfcol = c(4,4), mar = c(4.2, 0, 2, 2))
slope_range <- range(s[, paste0("beta1[", 1:4, "]")] )
for(i in 1:4) {
for(j in 1:4) {
slope_i_name <- paste0("beta1[", i, "]")
slope_j_name <- paste0("beta1[", j, "]")
slope_i <- s[,slope_i_name]
slope_j <- s[,slope_j_name]
slope_diff <- slope_i - slope_j
hist(slope_diff, 30, main = "", xlim = slope_range,
xlab = paste("Cow ", i, " - ", "Cow ", j), col = "lightblue")
}
}
It seems that 3rd and 4th cow are more responsive to the sunshine than two other cows (1st and 2nd one) and as a result they produce more milk in response to the sunshine exposition.
d <- structure(list(cow = c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10),
hours = c(2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12,
2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6,
8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12,
2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6, 8, 9, 11, 12, 2, 3, 5, 6, 8,
9, 11, 12), milk = c(891, 742, 796, 761, 674, 1166, 955, 485, 806, 605, 552,
755, 660, 752, 839, 660, 941, 891, 806, 1371, 1379, 1322, 1733, 1817, 849, 864,
921, 840, 876, 903, 924, 1064, 435, 1165, 1061, 639, 870, 902,
1239, 1110, 834, 869, 1049, 1422, 1286, 1726, 1296, 1814, 732,
805, 945, 823, 964, 881, 978, 1307, 694, 617, 795, 1022, 518,
157, 824, 483, 501, 863, 640, 472, 791, 747, 814, 910, 579, 809,
689, 826, 1032, 927, 828, 1149)), .Names = c("cow", "hours", "milk"),
class = "data.frame", row.names = c(NA, -80L))
data_list <- list(y = d$milk, x = d$hours, cow = d$cow, n_cow = length(unique(d$cow)))
model_string <- "model{
# Priors (here just using 'sloppy' uniform distributions)
for(cow_i in 1:n_cow) {
beta0[cow_i] ~ dnorm(beta0mu, 1 / pow( beta0sigma, 2))
beta1[cow_i] ~ dnorm(beta1mu, 1 / pow( beta1sigma, 2))
sigma[cow_i] ~ dunif(0,10000)
}
beta0mu ~ dunif(-10000,10000)
beta1mu ~ dunif(-10000,10000)
beta0sigma ~ dunif(0,10000)
beta1sigma ~ dunif(0,10000)
for(i in 1:length(y)) {
mu[i] <- beta0[cow[i]] + x[i] * beta1[cow[i]]
y[i] ~ dnorm(mu[i], 1 / pow( sigma[cow[i]], 2))
}
}"
params_to_monitor <- c("beta0mu","beta1mu", "beta0sigma", "beta1sigma" )
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 80
## Unobserved stochastic nodes: 34
## Total graph size: 1008
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=3000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
summary(mcmc_samples)
##
## Iterations = 2001:5000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 3000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0mu 688.1 53.0 0.559 2.603
## beta0sigma 77.0 54.4 0.574 3.128
## beta1mu 31.2 13.3 0.141 0.301
## beta1sigma 34.4 12.3 0.130 0.326
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0mu 578.92 654.9 688.8 722.5 787.5
## beta0sigma 3.49 37.2 68.7 105.2 209.5
## beta1mu 4.83 22.7 31.0 39.4 57.6
## beta1sigma 17.19 26.6 32.5 39.6 63.0
We have so many parameters now, and there is many things we could look at. We could start at looking at beta1mu the estimated mean increase of liters of milk per extra hour of sunshie in the Cow population:
s <- as.matrix(mcmc_samples)
quantile(s[,"beta1mu"], c(0.025, 0.5, 0.975))
## 2.5% 50% 97.5%
## 4.827 31.008 57.579
hist(s[,"beta1mu"])
A guess is that, on average, we would expect one hour of sun to result in 30 more liters of milk. To look at the uncertainty in the estimate we can plot the original data and on that plot a scatter of lines drawn from the estimated Cow population:
plot(d$hours, d$milk, type = "n", ylim = c(0, 2000))
for(i in unique(d$cow)) {
lines(d$hours[ d$cow == i], d$milk[ d$cow == i], type = "l", col = "blue", lwd = 2)
}
for(i in sample(nrow(s), 20)) {
beta0 <- rnorm(1, s[i, "beta0mu"], s[i, "beta0sigma"])
beta1 <- rnorm(1, s[i, "beta1mu"], s[i, "beta1sigma"])
abline(beta0, beta1, col = "grey")
}
d <- structure(list(test_intensity = c(6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14), choice = c(0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0)), .Names = c("test_intensity", "choice"), row.names = c(NA,-90L),
class = "data.frame")
data_list <- list(response = d$choice, test_intensity = d$test_intensity,
n_stim = length(d$test_intensity), test_intensity_mean = mean(d$test_intensity))
model_string <- "model{
for(i in 1:n_stim) {
response[i] ~ dbern (p[i])
logit(p[i]) <- alpha + beta * (test_intensity[i] - test_intensity_mean)
}
# Priors
alpha ~ dunif(-1000,1000)
beta ~ dunif(-1000,1000)
}"
params_to_monitor <- c("alpha", "beta")
# Running the model
model <- jags.model( textConnection(model_string), data_list, n.chains = 3, n.adapt= 1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 90
## Unobserved stochastic nodes: 2
## Total graph size: 225
##
## Initializing model
update(model, 1000); # Burning 1000 samples to the MCMC gods...
mcmc_samples <- coda.samples(model, params_to_monitor, n.iter=3000)
# Inspect the results
par(mar = c(4.2, 3, 2, 1))
plot(mcmc_samples)
summary(mcmc_samples)
##
## Iterations = 2001:5000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 3000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## alpha -0.000566 0.304 0.00320 0.00403
## beta 0.841283 0.162 0.00171 0.00228
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## alpha -0.598 -0.204 -0.00393 0.205 0.599
## beta 0.553 0.729 0.83134 0.941 1.192
Now plotting the psychometric function.
s <- as.matrix(mcmc_samples)
par(mar = c(5, 4.5, 2, 1))
post_sum <- summary(mcmc_samples)
post_sum
##
## Iterations = 2001:5000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 3000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## alpha -0.000566 0.304 0.00320 0.00403
## beta 0.841283 0.162 0.00171 0.00228
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## alpha -0.598 -0.204 -0.00393 0.205 0.599
## beta 0.553 0.729 0.83134 0.941 1.192
Test_intensity <- seq(from = 6, to = 14, by = 1)
p1 <- c(mean(d$choice[1:10]),mean(d$choice[11:20]),mean(d$choice[21:30]),mean(d$choice[31:40]),
mean(d$choice[41:50]), mean(d$choice[51:60]), mean(d$choice[61:70]), mean(d$choice[71:80]),
mean(d$choice[81:90]))
plot(Test_intensity, p1, col="black", type="p", xlab = "Test Stimulus Intensity", ylab = "Probability", lwd = 2)
x2 <- seq(from = 6, to = 14, by = 0.1)
p2 <- 1/(1+exp(-1*(post_sum$quantiles["alpha", "50%"] + post_sum$quantiles["beta", "50%"] * (x2 - mean(d$test_intensity)))))
lines(x2, p2, col="red", type="l", lwd = 3)
# Adding a sample of the posterior draws to the plot in order to visualize the
# uncertainty of the psychometric function
for(i in sample(nrow(s), size = 50)) {
p3 <- 1/(1+exp(-1*(s[i,"alpha"] + s[i,"beta"] * (x2 - mean(d$test_intensity)))))
lines(x2, p3, col="grey", type="l")
}
lines(x2, p2, col="red", type="l", lwd = 3)
It seems that the “point of subjective equality” (PSE - difference in intensity for which the cow chooses the correct response 50% of the time) is around a light intensity of 10. The just noticeable difference (JND - the intensity threshold at which the cow just notices a difference in intensity between two stimuli; here we can define the JND to be the difference in stimulus intensity that makes classification performance rise from 50% to 84%) is around an increase in the light intensity of 2.