prediction functions
library(ggplot2); library(MCMCglmm)
normal.evd<-function(x, mu, sd){
exp(-exp(x))*dnorm(x, mu, sd)
}
normal.zt<-function(x, mu, sd){
exp(x)/(1-exp(-exp(x)))*dnorm(x, mu, sd)
}
pred<-function(a_1, a_2, sd_1, sd_2){
prob<-1-integrate(normal.evd, qnorm(0.0001, a_2,sd_2),
qnorm(0.9999, a_2,sd_2), a_2,sd_2)[[1]]
meanc<-integrate(normal.zt, qnorm(0.0001, a_1,sd_1),
qnorm(0.9999, a_1,sd_1), a_1,sd_1)[[1]]
prob*meanc
}
HPDpredict_za = function(object, predictors) {
library(MCMCglmm); library(coda); library(formr); library(reshape2);
if(class(object) != "MCMCglmm") {
if(length( object[[1]]$Residual$nrt )>1) {
object = lapply(object,FUN=function(x) { x$Residual$nrt<-2;x })
}
Sol = mcmc.list(lapply(object,FUN=function(x) { x$Sol}))
VCV = mcmc.list(lapply(object,FUN=function(x) { x$VCV}))
vars = colnames(Sol[[1]])
} else {
Sol = as.data.frame(object$Sol)
VCV = as.data.frame(object$VCV)
vars = names(Sol)
}
za_intercept_name = vars[ ! vars %contains% ":" & vars %begins_with% "traitza_"]
outcome_name = stringr::str_sub(za_intercept_name,stringr::str_length("traitza__"))
vcv_1_name = paste0(outcome_name,".units")
vcv_2_name = paste0("za_",outcome_name,".units")
intercept_name = "(Intercept)"
intercept = Sol[,intercept_name]
za_intercept = Sol[, za_intercept_name]
sd_1 = VCV[, vcv_1_name]
sd_2 = VCV[, vcv_2_name]
if(class(object) != "MCMCglmm") {
intercept = unlist(intercept)
za_intercept = unlist(za_intercept)
sd_1 = unlist(sd_1)
sd_2 = unlist(sd_2)
}
sd_1 = sqrt(sd_1)
sd_2 = sqrt(sd_2)
samples = length(unlist(intercept))
df = data.frame(matrix(NA_real_, nrow = samples, ncol = length(predictors)))
pred_vars = paste0(names(predictors), sapply(predictors, FUN = function(value) {
if(value == 1) { "" } else { paste0("_",as.numeric(value)) }
}))
names(df) = pred_vars
for(i in seq_along(predictors)) {
predictor = names(predictors)[i]
value = predictors[i]
za_predictor = vars[ vars == paste0(za_intercept_name,":", predictor) ]
if(value != 0) {
l1 = Sol[, predictor ]
l2 = Sol[, za_predictor ]
if(is.list(object)) {
l1 = unlist(l1)
l2 = unlist(l2)
}
l1 = value * l1
l2 = value * l2
} else {
l1 = l2 = rep(0,times = samples)
}
at_predictor = numeric(samples)
for(j in 1:samples) {
at_predictor[j] = pred(a_1 = intercept[j] + l1[j],
a_2 = za_intercept[i] + l2[j],
sd_1 = sd_1[j], sd_2 = sd_2[j])
}
if(value == 1) { colname = predictor } else { colname = paste0(predictor,"_",value) }
df[, colname] = at_predictor
}
mstrapped = suppressMessages(melt(df))
invisible(mstrapped)
}
HPDpredict_za_old = function(object, predictors) {
library(MCMCglmm); library(coda); library(formr); library(reshape2);
if(class(object) != "MCMCglmm") {
if(length( object[[1]]$Residual$nrt )>1) {
object = lapply(object,FUN=function(x) { x$Residual$nrt<-2;x })
}
Sol = mcmc.list(lapply(object,FUN=function(x) { x$Sol}))
vars = colnames(Sol[[1]])
} else {
Sol = as.data.frame(object$Sol)
vars = names(Sol)
}
za_intercept_name = vars[ ! vars %contains% ":" & vars %begins_with% "traitza_"]
intercept = Sol[,"(Intercept)"]
za_intercept = Sol[, za_intercept_name]
if(is.list(object)) {
intercept = unlist(intercept)
za_intercept = unlist(za_intercept)
}
samples = length(unlist(intercept))
df = data.frame(matrix(NA_real_, nrow = samples, ncol = length(predictors)))
pred_vars = paste0(names(predictors), sapply(predictors, FUN = function(value) {
if(value == 1) { "" } else { paste0("_",as.numeric(value)) }
}))
names(df) = pred_vars
for(i in seq_along(predictors)) {
predictor = names(predictors)[i]
value = predictors[i]
za_predictor = vars[ vars == paste0(za_intercept_name,":", predictor) ]
if(value != 0) {
l1 = Sol[, predictor ]
l2 = Sol[, za_predictor ]
if(is.list(object)) {
l1 = unlist(l1)
l2 = unlist(l2)
}
l1 = value * l1
l2 = value * l2
} else {
l1 = l2 = 0
}
py_0 = dpois(0, exp(intercept + za_intercept + l2))
y_ygt0 = exp(intercept + l1)
at_predictor_1 = (1-py_0) * y_ygt0
if(value == 1) { colname = predictor } else { colname = paste0(predictor,"_",value) }
df[, colname] = at_predictor_1
}
mstrapped = suppressMessages(melt(df))
invisible(mstrapped)
}
cases = 1000
fitness<-rnorm(cases)
y <- ifelse( fitness < 0, 0,
rpois(cases, exp(fitness)+0.5))
fa <- factor(ifelse(fitness > 1.5, 1, 0))
table(y)
## y
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 16 19
## 538 94 110 84 72 32 19 15 9 11 2 2 4 3 2 2 1
data=data.frame(y=y, fitness=fitness, fa = fa)
qplot(data$y)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data, aes(fa, y)) +
geom_pointrange(stat='summary', fun.data="mean_cl_boot")
prior=list(R=list(V=diag(2), nu=0.002, fix=2))
# use 0 as the reference
fa.pred<- c(0,1)
names(fa.pred) = rep("fa1", length(fa.pred))
m1<-MCMCglmm(y~trait * (1 + fa), rcov=~idh(trait):units, data=data,
family="zapoisson", prior=prior)
##
## MCMC iteration = 0
##
## Acceptance ratio for latent scores = 0.000374
##
## MCMC iteration = 1000
##
## Acceptance ratio for latent scores = 0.311409
##
## MCMC iteration = 2000
##
## Acceptance ratio for latent scores = 0.303929
##
## MCMC iteration = 3000
##
## Acceptance ratio for latent scores = 0.305933
##
## MCMC iteration = 4000
##
## Acceptance ratio for latent scores = 0.350988
##
## MCMC iteration = 5000
##
## Acceptance ratio for latent scores = 0.360871
##
## MCMC iteration = 6000
##
## Acceptance ratio for latent scores = 0.343853
##
## MCMC iteration = 7000
##
## Acceptance ratio for latent scores = 0.333109
##
## MCMC iteration = 8000
##
## Acceptance ratio for latent scores = 0.345636
##
## MCMC iteration = 9000
##
## Acceptance ratio for latent scores = 0.347459
##
## MCMC iteration = 10000
##
## Acceptance ratio for latent scores = 0.355807
##
## MCMC iteration = 11000
##
## Acceptance ratio for latent scores = 0.352589
##
## MCMC iteration = 12000
##
## Acceptance ratio for latent scores = 0.328316
##
## MCMC iteration = 13000
##
## Acceptance ratio for latent scores = 0.355210
predicted = HPDpredict_za(m1, fa.pred)
# use 1 as the reference
data2 = data
data2$fa = relevel(data2$fa,ref = "1")
fa.pred2 <- c(0,1)
names(fa.pred2) = rep("fa0", length(fa.pred2))
m2<-MCMCglmm(y~trait * (1 + fa), rcov=~idh(trait):units, data=data2,
family="zapoisson", prior=prior)
##
## MCMC iteration = 0
##
## Acceptance ratio for latent scores = 0.000391
##
## MCMC iteration = 1000
##
## Acceptance ratio for latent scores = 0.316639
##
## MCMC iteration = 2000
##
## Acceptance ratio for latent scores = 0.307234
##
## MCMC iteration = 3000
##
## Acceptance ratio for latent scores = 0.309898
##
## MCMC iteration = 4000
##
## Acceptance ratio for latent scores = 0.242822
##
## MCMC iteration = 5000
##
## Acceptance ratio for latent scores = 0.273793
##
## MCMC iteration = 6000
##
## Acceptance ratio for latent scores = 0.267969
##
## MCMC iteration = 7000
##
## Acceptance ratio for latent scores = 0.260145
##
## MCMC iteration = 8000
##
## Acceptance ratio for latent scores = 0.257007
##
## MCMC iteration = 9000
##
## Acceptance ratio for latent scores = 0.275591
##
## MCMC iteration = 10000
##
## Acceptance ratio for latent scores = 0.242387
##
## MCMC iteration = 11000
##
## Acceptance ratio for latent scores = 0.238454
##
## MCMC iteration = 12000
##
## Acceptance ratio for latent scores = 0.270462
##
## MCMC iteration = 13000
##
## Acceptance ratio for latent scores = 0.259527
predicted2 = HPDpredict_za(m2, fa.pred2)
predicted2$reference = "1"
predicted$reference = "0"
predicted$variable = as.character(predicted$variable)
predicted2$variable = as.character(predicted2$variable)
predicted$variable[predicted$variable=="fa1_0"] = "fa0"
predicted2$variable[predicted2$variable=="fa0_0"] = "fa1"
ggplot(rbind(predicted,predicted2), aes(variable, value, fill = reference)) +
geom_violin() + geom_pointrange(aes(colour = reference), position = position_dodge(width=0.1), stat='summary', fun.data="mean_sdl")
uncertainty in both estimates is higher if the reference with few data points is chosen. predictions are too low, fa1 is predicted to be lower than fa0, even though it’s higher.
newdata = expand.grid(fa = unique(data$fa))
newdata[,c("fit","se.fit")] = predict(glm(y ~ fa, data = data, family = poisson),se.fit = T,newdata = newdata, type = "response")[1:2]
newdata2 = expand.grid(fa = unique(data2$fa))
newdata2[,c("fit","se.fit")] = predict(glm(y ~ fa, data = data2, family = poisson),se.fit = T,newdata = newdata2, type = "response")[1:2]
newdata$reference = "0"
newdata2$reference = "1"
ggplot(rbind(newdata,newdata2), aes(fa, fit, colour = reference)) +
geom_pointrange(aes(colour = reference, ymin = fit - se.fit, ymax = fit + se.fit),position = position_dodge(width=0.1), stat='identity')
error bars are identical, no matter which reference
prior=list(R=list(V=diag(2), nu=0.002, fix=2))
# use 0 as the reference
fa.pred<- c(0,1)
names(fa.pred) = rep("fa1", length(fa.pred))
m1<-MCMCglmm(y~trait * (1 + fa), rcov=~idh(trait):units, data=data,
family="zapoisson", prior=prior)
##
## MCMC iteration = 0
##
## Acceptance ratio for latent scores = 0.000380
##
## MCMC iteration = 1000
##
## Acceptance ratio for latent scores = 0.312849
##
## MCMC iteration = 2000
##
## Acceptance ratio for latent scores = 0.305347
##
## MCMC iteration = 3000
##
## Acceptance ratio for latent scores = 0.304347
##
## MCMC iteration = 4000
##
## Acceptance ratio for latent scores = 0.332242
##
## MCMC iteration = 5000
##
## Acceptance ratio for latent scores = 0.317895
##
## MCMC iteration = 6000
##
## Acceptance ratio for latent scores = 0.336136
##
## MCMC iteration = 7000
##
## Acceptance ratio for latent scores = 0.334753
##
## MCMC iteration = 8000
##
## Acceptance ratio for latent scores = 0.327264
##
## MCMC iteration = 9000
##
## Acceptance ratio for latent scores = 0.322766
##
## MCMC iteration = 10000
##
## Acceptance ratio for latent scores = 0.335903
##
## MCMC iteration = 11000
##
## Acceptance ratio for latent scores = 0.328400
##
## MCMC iteration = 12000
##
## Acceptance ratio for latent scores = 0.316310
##
## MCMC iteration = 13000
##
## Acceptance ratio for latent scores = 0.319423
predicted = HPDpredict_za_old(m1, fa.pred)
# use 1 as the reference
data2 = data
data2$fa = relevel(data2$fa,ref = "1")
fa.pred2 <- c(0,1)
names(fa.pred2) = rep("fa0", length(fa.pred2))
m2<-MCMCglmm(y~trait * (1 + fa), rcov=~idh(trait):units, data=data2,
family="zapoisson", prior=prior)
##
## MCMC iteration = 0
##
## Acceptance ratio for latent scores = 0.000357
##
## MCMC iteration = 1000
##
## Acceptance ratio for latent scores = 0.317110
##
## MCMC iteration = 2000
##
## Acceptance ratio for latent scores = 0.305191
##
## MCMC iteration = 3000
##
## Acceptance ratio for latent scores = 0.303861
##
## MCMC iteration = 4000
##
## Acceptance ratio for latent scores = 0.309769
##
## MCMC iteration = 5000
##
## Acceptance ratio for latent scores = 0.309628
##
## MCMC iteration = 6000
##
## Acceptance ratio for latent scores = 0.259362
##
## MCMC iteration = 7000
##
## Acceptance ratio for latent scores = 0.301539
##
## MCMC iteration = 8000
##
## Acceptance ratio for latent scores = 0.293685
##
## MCMC iteration = 9000
##
## Acceptance ratio for latent scores = 0.283679
##
## MCMC iteration = 10000
##
## Acceptance ratio for latent scores = 0.294668
##
## MCMC iteration = 11000
##
## Acceptance ratio for latent scores = 0.279069
##
## MCMC iteration = 12000
##
## Acceptance ratio for latent scores = 0.286238
##
## MCMC iteration = 13000
##
## Acceptance ratio for latent scores = 0.307295
predicted2 = HPDpredict_za_old(m2, fa.pred2)
predicted2$reference = "1"
predicted$reference = "0"
predicted$variable = as.character(predicted$variable)
predicted2$variable = as.character(predicted2$variable)
predicted$variable[predicted$variable=="fa1_0"] = "fa0"
predicted2$variable[predicted2$variable=="fa0_0"] = "fa1"
ggplot(rbind(predicted,predicted2), aes(variable, value, fill = reference)) +
geom_violin() + geom_pointrange(aes(colour = reference), position = position_dodge(width=0.1), stat='summary', fun.data="mean_sdl")