sessionInfo()

## R version 3.2.5 (2016-04-14)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 10586)
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] magrittr_1.5    formatR_1.2.1   tools_3.2.5     htmltools_0.3  
##  [5] yaml_2.1.13     stringi_1.0-1   rmarkdown_0.9.2 knitr_1.11     
##  [9] stringr_1.0.0   digest_0.6.8    evaluate_0.8

if(!require('ggplot2')){install.packages('ggplot2')}
if(!require('userfriendlyscience')){install.packages('userfriendlyscience')}
if(!require('sjPlot')){install.packages('sjPlot')}
if(!require('MASS')){install.packages('MASS')}
if(!require('BayesFactor')){install.packages('BayesFactor')}
if(!require('car')){install.packages('car')}
if(!require('mvtnorm')){install.packages('mvtnorm')}
if(!require('Rcpp')){install.packages('Rcpp')}

library('Rcpp')
library('sjPlot')
library('BayesFactor')
library('ggplot2')
library('userfriendlyscience')
library('MASS')
library('car')
library('mvtnorm')

Analysing the study design with Retrodesign.

Gelman, A., & Carlin, J. (2014). Beyond Power Calculations Assessing Type S (Sign) and Type M (Magnitude) Errors. Perspectives on Psychological Science, 9(6), 641-651. http://doi.org/10.1177/1745691614551642

Note: standard error formula for d below acquired from slide 9 (on p. 5) of this document.

## Create a vector of possible effect sizes for the x-axis:
xax <- seq(from = 0.05, to = 2, by = 0.05)

## Calculate the SE of d in this particular case:
n1 <- 133
n2 <- 129
sed <- sqrt((n1+n2)/(n1*n2)+(xax^2)/(2*(n1+n2)))

Whip up Retrodesign (all this from Gelman & Carlin):

retrodesign <- function(A, s, alpha=.05, df=Inf, n.sims=10000){
  z <- qt(1-alpha/2, df)
  p.hi <- 1 - pt(z-A/s, df)
  p.lo <- pt(-z-A/s, df)
  power <- p.hi + p.lo
  typeS <- p.lo/power
  estimate <- A + s*rt(n.sims,df)
  significant <- abs(estimate) > s*z
  exaggeration <- mean(abs(estimate)[significant])/A
  return(list(power=power, typeS=typeS, exaggeration=exaggeration))
}

Draw plots:

## Power:
retroPow <- (retrodesign(xax, sed)$power)

qplot(xax, retroPow) + geom_point() + geom_line() + xlab("True effect size d") + 
    ylab("Power") + scale_y_continuous(breaks = seq(0, 1, 0.05), minor_breaks = seq(0, 
    1, 0.05)) + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14, 
    face = "bold")) + scale_x_continuous(breaks = seq(0, 1, 0.1), minor_breaks = seq(0, 
    1, 0.1), limits = c(0, 1)) + theme_bw()

## Exaggeration ratio:
retroExg <- (retrodesign(xax, sed)$exaggeration)

qplot(xax, retroExg) + ylim(0, 30) + xlim(0, 1) + geom_point() + geom_line() + 
    xlab("True effect size") + ylab("Expected type M error (exaggeration ratio)") + 
    scale_y_continuous(breaks = seq(0, 30, 1), minor_breaks = seq(0, 30, 1)) + 
    theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14, 
        face = "bold")) + theme_bw()

## Probability of wrong sign:
retroS <- (retrodesign(xax, sed)$typeS)

qplot(xax, retroS) + ylim(0, 40) + xlim(0, 1) + geom_point() + geom_line() + 
    xlab("True effect size") + ylab("Expected type S error: p(wrong sign)") + 
    scale_y_continuous(breaks = seq(0, 0.2, 0.025), minor_breaks = seq(0, 0.2, 
        0.025)) + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14, 
    face = "bold")) + theme_bw()

Evaluating the v-statistic

Code below is adapted from Daniel Lakens’ blog.

if (!require("hypergeo")) {
    install.packages("hypergeo")
}
library("hypergeo")

# Lakens:
#'Below, I'm vectorizing the function so that I can plot curves.
# The rest is unchanged from the vstat function by Stober-Davis & Dana. If
# you want to use R unbiased, remove the # before the Rsq adjustment
# calculation below'
vstat <- Vectorize(function(n, p, Rsq) {
    Rsq = Re(1 - ((n - 2)/(n - p)) * (1 - Rsq) * hypergeo(1, 1, (n - p + 2) * 
        0.5, 1 - Rsq))
    if (Rsq <= 0) {
        Rsq = 1e-04
    }
    r = ((p - 1) * (1 - Rsq))/((n - p) * Rsq)
    g = min(r, 1)
    if (g < 0.5001 && g > 0.4999) {
        g = 0.5001
    }
    z = (g - sqrt(g - g^2))/(2 * g - 1)
    alpha = acos((1 - z)/sqrt(1 - 2 * z * (1 - z)))
    v = Re((((2 * cos(alpha) * gamma((p + 2)/2))/(sqrt(pi) * gamma((p + 1)/2))) * 
        (hypergeo(0.5, (1 - p)/2, 3/2, cos(alpha)^2) - sin(alpha)^(p - 1))))
    return(v)
})

## Plot it:
curve(vstat(Rsq = x, n = 133 + 129 + 83 + 7, p = 2), 0.01, 0.25, type = "l", 
    col = "purple", ylim = c(0, 1), xlab = "R-squared when Estimating 2 Parameters", 
    lty = 1, ylab = "v-statistic")
par(new = TRUE)
curve(vstat(Rsq = x, n = 133 + 129, p = 2), 0.01, 0.25, type = "l", col = "green", 
    ylim = c(0, 1), xaxt = "n", yaxt = "n", xlab = "", ylab = "", lty = 2)
par(new = TRUE)

# Horizontal line at 0.5 cut-off
abline(h = 0.5, col = "azure4", lty = 5)
# Legend
legend(0.05, 0.4, c("Reminder (n=262) v. no reminder (n=90)", "Reason (n=133) v. Succinct (n=129)"), 
    lty = c(1, 2), lwd = c(2.5, 2.5), col = c("purple", "green"))

Variables

lmi <- read.csv('C:/Users/matti/Desktop/Gradufilet/160419_EL DATA_Nimitiedoton_OE etc. T3 added.csv', sep = ",", dec = ".") 

# Recruitment wave

lmi$Vastaajatunnus <- lmi$ï..Vastaajatunnus

lmi$batch[lmi$Vastaajatunnus<1000] <- "1"
lmi$batch[lmi$Vastaajatunnus>1000 & lmi$Vastaajatunnus<2000] <- "2"
lmi$batch[lmi$Vastaajatunnus>=9000 & lmi$Vastaajatunnus<=9026] <- "1"
lmi$batch[lmi$Vastaajatunnus>=9027] <- "2"

lmi$batch <- as.numeric(lmi$batch)

lmi$sex <- factor(lmi$q0005, levels=c(1,2), labels=c("Boy","Girl"))
lmi$age <- lmi$IkÃ.

# SMS-group as a FACTOR:
lmi$SMSg <- factor(lmi$SMS_group, levels=c(1,2,3,4), labels=c("Reason","Succinct","No SMS","Failed to send"))
lmi$SMSg2 <- factor(lmi$SMS_group, levels=c(1,2), labels=c("Reason","Succinct"))
lmi$SMSg3 <- factor(lmi$SMS_group, levels=c(1,2,3), labels=c("Reason (n=133)","Succinct (n=135)","Opt out (n=83)"))

lmi$SMSg4 <- factor(lmi$SMS_group_optinvsoptout, levels=c(1,2), labels=c("Opt in","Opt out"))

lmi$SMSgall <- factor(lmi$SMS_group, levels=c(1,2,3,4), labels=c("Reason", "Succinct", "Opt out", "Send failed"))

Analyses

Implementation measures

if(!require('devtools')){install.packages('devtools')}
if(!require('rjags')){install.packages('rjags')}
if(!require('yarrr')){install_github('ndphillips/yarrr')}

library('devtools')
library('yarrr')
library('rjags')

Reading the messages:

lmi$read <- "I opened the SMS and read it on the morning it was sent..."

pirateplot(formula = lmi$SMS_read ~ read,
           data = lmi,
           xlab = "",
           ylab = "",
           ylim = c(0, 5),
           pal = "southpark",
           point.o = .8,
           line.o = 1,
           theme.o = 0,
           bean.o = .2,
           line.lwd = 1,
           hdi.o = .8,
           point.pch = 1,
           point.cex = 1,
           jitter.val = .07
)

chisq.test(lmi$SMS_read, lmi$SMSg2)

## 
##  Pearson's Chi-squared test
## 
## data:  lmi$SMS_read and lmi$SMSg2
## X-squared = 1.3564, df = 4, p-value = 0.8517

# Create data matrix for Bayesian contingency tables:
table.readbf <- table(data.frame(lmi$SMS_read, lmi$SMSg2))

# Bayes factor for the contingency tables with default prior concentration:

readbf <- contingencyTableBF(table.readbf, sampleType = "poisson")
extractBF(readbf)$bf

## [1] 0.02846462

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.readbf, sampleType = "poisson", priorConcentration = ownprior))$bf
})

# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors, group = 1)) + 
    ylim(0, max(bayesFactors)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF10") +
    theme_bw()

plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Discussing the messages

lmi$contam <- "I discussed the content of the messages with my peers at school..."

pirateplot(formula = lmi$SMS_contam ~ contam,
           data = lmi,
           xlab = "",
           ylab = "",
           ylim = c(0, 5),
           pal = "forestgreen",
           point.o = .8,
           line.o = 1,
           theme.o = 0,
           bean.o = .2,
           line.lwd = 1,
           hdi.o = .8,
           point.pch = 1,
           point.cex = 1,
           jitter.val = .07
)

chisq.test(lmi$SMS_contam, lmi$SMSg2)

## 
##  Pearson's Chi-squared test
## 
## data:  lmi$SMS_contam and lmi$SMSg2
## X-squared = 2.5666, df = 4, p-value = 0.6327

# Create data matrix for Bayesian contingency tables:
table.contambf <- table(data.frame(lmi$SMS_contam, lmi$SMSg2))

# Bayes factor for the contingency tables with default prior concentration:

contambf <- contingencyTableBF(table.contambf, sampleType = "poisson")
extractBF(contambf)$bf

## [1] 0.01093142

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.contambf, sampleType = "poisson", priorConcentration = ownprior))$bf
})

# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors, group = 1)) + 
    ylim(0, max(bayesFactors)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF10") +
    theme_bw()

plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Satisfaction with the messages

lmi$satisf <- "I was satisfied with the content of the messages"

pirateplot(formula = lmi$SMS_satisf ~ satisf,
           data = lmi,
           xlab = "",
           ylab = "",
           ylim = c(0, 5),
           pal = "tan4",
           point.o = 1,
           line.o = 1,
           theme.o = 0,
           bean.o = .2,
           line.lwd = 1,
           hdi.o = .0,
           point.pch = 1,
           point.cex = 1,
           jitter.val = .07
)

chisq.test(lmi$SMS_satisf, lmi$SMSg2)

## 
##  Pearson's Chi-squared test
## 
## data:  lmi$SMS_satisf and lmi$SMSg2
## X-squared = 3.9027, df = 4, p-value = 0.4193

# Create data matrix for Bayesian contingency tables:
table.satisfbf <- table(data.frame(lmi$SMS_satisf, lmi$SMSg2))

# Bayes factor for the contingency tables with default prior concentration:

satisfbf <- contingencyTableBF(table.satisfbf, sampleType = "poisson")
extractBF(satisfbf)$bf

## [1] 0.03160562

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.satisfbf, sampleType = "poisson", priorConcentration = ownprior))$bf
})

# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors, group = 1)) + 
    ylim(0, max(bayesFactors)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF10") +
    theme_bw()

plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

A Kernel density plot on total wear times within the two message groups

if(!require('sm')){install.packages('sm')}
source("C:/Users/matti/Desktop/Gradufilet/DBDA2Eprograms/DBDA2E-utilities.R")

## 
## *********************************************************************
## Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:
## A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.
## *********************************************************************

library('sm')

# Changing the sm density compare function to allow different color of the band of equality. Copied from https://stat.ethz.ch/pipermail/r-help//2009-March/416920.html.

sm.density.compare2 <- function (x, group, h, model = "none", bandcol =
                                  'cyan', lwd = par("lwd"), usePolyg = NULL, asp=NA, 
                                xlab=opt$xlab, ylab=opt$ylab, ...) 
{
  if (!is.vector(x)) 
    stop("sm.density.compare can handle only 1-d data")
  opt <- sm.options(list(...))
  sm:::replace.na(opt, ngrid, 50)                 
  ## These all changed from replace.na() --> sm:::
  sm:::replace.na(opt, display, "line")
  sm:::replace.na(opt, xlab, deparse(substitute(x)))
  sm:::replace.na(opt, ylab, "Density")
  sm:::replace.na(opt, xlim, c(min(x) - diff(range(x))/4, max(x) + 
                                 diff(range(x))/4))
  sm:::replace.na(opt, eval.points, seq(opt$xlim[1], opt$xlim[2], 
                                        length = opt$ngrid))
  if (is.na(opt$band)) {
    if (model == "none") 
      opt$band <- FALSE
    else opt$band <- TRUE
  }
  if ((model == "none") && opt$band) 
    opt$band <- FALSE
  band <- opt$band
  ngrid <- opt$ngrid
  xlim <- opt$xlim
  nboot <- opt$nboot
  y <- x
  if (is.na(opt$test)) {
    if (model == "none") 
      opt$test <- FALSE
    else opt$test <- TRUE
  }
  if ((model == "none") && opt$test) 
    opt$test <- FALSE
  test <- opt$test
  if (opt$display %in% "none") 
    band <- FALSE
  fact <- factor(group)
  fact.levels <- levels(fact)
  nlev <- length(fact.levels)
  ni <- table(fact)
  if (band & (nlev > 2)) {
    cat("Reference band available to compare two groups only.", 
        "\n")
    band <- FALSE
  }
  if (length(opt$lty) < nlev) 
    opt$lty <- 1:nlev
  if (length(opt$col) < nlev) 
    opt$col <- 2:(nlev + 1)
  if (missing(h)) 
    h <- h.select(x, y = NA, group = group, ...)
  opt$band <- band
  opt$test <- test
  estimate <- matrix(0, ncol = opt$ngrid, nrow = nlev)
  se <- matrix(0, ncol = opt$ngrid, nrow = nlev)
  for (i in 1:nlev) {
    sm <- sm.density(y[fact == fact.levels[i]], h = h, display = "none", 
                     eval.points = opt$eval.points)
    estimate[i, ] <- sm$estimate
    se[i, ] <- sm$se
  }
  eval.points <- sm$eval.points
  if (!(opt$display %in% "none" | band)) {
    plot(xlim, c(0, 1.1 * max(as.vector(estimate))), xlab = opt$xlab, 
         ylab = opt$ylab, type = "n")
    #for (i in 1:nlev) lines(eval.points, estimate[i, ], lty = opt$lty[i], 
    #    col = opt$col[i])
    for (i in 1:nlev) lines(eval.points, estimate[i, ], lty =
                              opt$lty[i],   ## lwd hacked in
                            col = opt$col[i], lwd = lwd[i])
  }
  est <- NULL
  p <- NULL
  if (model == "equal" & test) {
    if (nlev == 2) {
      ts <- sum((estimate[1, ] - estimate[2, ])^2)
    }
    else {
      sm.mean <- sm.density(y, h = h, xlim = opt$xlim, 
                            ngrid = opt$ngrid, display = "none")$estimate
      ts <- 0
      for (i in 1:nlev) ts <- ts + ni[i] * sum((estimate[i, 
                                                         ] - sm.mean)^2)
    }
    p <- 0
    est.star <- matrix(0, ncol = opt$ngrid, nrow = nlev)
    for (iboot in 1:nboot) {
      ind <- (1:length(y))
      for (i in 1:nlev) {
        indi <- sample((1:length(ind)), ni[i])
        est.star[i, ] <- sm.density(y[ind[indi]], h = h, 
                                    ngrid = opt$ngrid, xlim = opt$xlim, display =
                                      "none")$estimate
        ind <- ind[-indi]
      }
      if (nlev == 2) {
        ts.star <- sum((est.star[1, ] - est.star[2, ])^2)
      }
      else {
        sm.mean <- sm.density(y, h = h, xlim = opt$xlim, 
                              ngrid = opt$ngrid, display = "none")$estimate
        ts.star <- 0
        for (i in 1:nlev) {
          ts.star <- ts.star + ni[i] * sum((est.star[i, 
                                                     ] - sm.mean)^2)
        }
      }
      if (ts.star > ts) 
        p <- p + 1
      if (opt$verbose > 1) {
        cat(iboot)
        cat(" ")
      }
    }
    p <- p/nboot
    cat("\nTest of equal densities:  p-value = ", round(p, 
                                                        3), "\n")
    est <- list(p = p, h = h)
  }
  if (model == "equal" & band) {
    av <- (sqrt(estimate[1, ]) + sqrt(estimate[2, ]))/2
    se <- sqrt(se[1, ]^2 + se[2, ]^2)
    upper <- (av + se)^2
    lower <- pmax(av - se, 0)^2
    plot(xlim, c(0, 1.1 * max(as.vector(estimate), upper)), 
         xlab = xlab, ylab = ylab, type = "n", asp=asp, ...)     
    ## ... and asp added; was opt$xlab and opt$ylab
    polygon(c(eval.points, rev(eval.points)), c(upper, rev(lower)), 
            col = bandcol, border = 0)                                      
    ## was col = "cyan"
    if (is.null(usePolyg)) {
      lines(eval.points, estimate[1, ], lty = opt$lty[1], col =
              opt$col[1], lwd = lwd[1])
      lines(eval.points, estimate[2, ], lty = opt$lty[2], col =
              opt$col[2], lwd = lwd[2])
    }
    else {
      polygon(eval.points, estimate[1, ], lty = opt$lty[1], col =
                opt$col[1], lwd = lwd[1])
      polygon(eval.points, estimate[2, ], lty = opt$lty[2], col =
                opt$col[2], lwd = lwd[2])
    }
    est <- list(p = p, upper = upper, lower = lower, h = h)
  }
  invisible(est)
}

# WEARTIME KERNEL: H_Weartime.SUM
openGraph(width=6,height=6) 
with(na.omit(cbind.data.frame(lmi$H_Weartime.SUM,lmi$SMSg2)), sm.density.compare2(lmi$H_Weartime.SUM, lmi$SMSg2, xlab="Minutes", col=c(1,2), lty=c(2,1), bandcol='LightGray', model="equal", lwd=(c(2,2)), xlim=c(0,8000)))

## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## missing data are removed
## 
## Test of equal densities:  p-value =  0.3

title(main="")
colfill<-c(1,2)
legend("topleft", inset=.05, levels(lmi$SMSg2), fill=colfill)
MeanR <- mean(lmi$H_Weartime.SUM[which(lmi$SMS_group=="1")], na.rm=T)
MeanS <- mean(lmi$H_Weartime.SUM[which(lmi$SMS_group=="2")], na.rm=T)
MediR <- median(lmi$H_Weartime.SUM[which(lmi$SMS_group=="1")], na.rm=T)
MediS <- median(lmi$H_Weartime.SUM[which(lmi$SMS_group=="2")], na.rm=T)
SdR <- sd(lmi$H_Weartime.SUM[which(lmi$SMS_group=="1")], na.rm=T)
SdS <- sd(lmi$H_Weartime.SUM[which(lmi$SMS_group=="2")], na.rm=T)
legend("bottom", legend = c(paste("Mean (sd) Reason:", sep=""),
                                 paste(round(MeanR, 2), " (", round(SdR, 2),")", "; n=", sum(!is.na(lmi$H_Weartime.SUM[which(lmi$SMS_group=="1")])), sep=""),
                                 paste(" "),
                                 paste("Mean (sd) Succinct:", sep=""),
                                 paste(round(MeanS, 2), " (", round(SdS, 2),")", "; n=", sum(!is.na(lmi$H_Weartime.SUM[which(lmi$SMS_group=="2")])), sep="")), 
       bty = "n", cex=0.5)

Weartime minutes (ANOVA, MANOVA):

# Mann-Whitney U-test
reasonsucc <- wilcox.test(lmi$H_Weartime.SUM ~ lmi$SMSg2, exact = TRUE, conf.int = TRUE, conf.level = 0.95, alternative = "two.sided")

reasonsucc$statistic

##    W 
## 8860

reasonsucc$conf.int

## [1] -280.8999  447.2001
## attr(,"conf.level")
## [1] 0.95

reasonsucc$p.value

## [1] 0.6467758

schools <- wilcox.test(lmi$H_Weartime.SUM ~ lmi$iv, exact = TRUE, conf.int = TRUE, conf.level = 0.95, alternative = "two.sided")

schools$statistic

##       W 
## 17398.5

schools$conf.int

## [1]  -1.600039 619.600069
## attr(,"conf.level")
## [1] 0.95

schools$p.value

## [1] 0.05113113

waves <- wilcox.test(lmi$H_Weartime.SUM ~ lmi$batch, exact = TRUE, conf.int = TRUE, conf.level = 0.95, alternative = "two.sided")

waves$statistic

##       W 
## 17310.5

waves$conf.int

## [1] -19.0 586.3
## attr(,"conf.level")
## [1] 0.95

waves$p.value

## [1] 0.06695725

optin <- wilcox.test(lmi$H_Weartime.SUM ~ lmi$SMSg4, exact = TRUE, conf.int = TRUE, conf.level = 0.95, alternative = "two.sided")

optin$statistic

##       W 
## 10642.5

optin$conf.int

## [1] -424.1999  305.2999
## attr(,"conf.level")
## [1] 0.95

optin$p.value

## [1] 0.7714621

# ANOVA
oneway(y=lmi$H_Weartime.SUM,
       x=lmi$SMSg3,
       means=TRUE, posthoc="holm", plot=TRUE, levene=TRUE)

## ### Means for y (H_Weartime.SUM) separate for each level of x (SMSg3):
## 
## x: Reason (n=133)
##     n    mean      sd median     se
##   133 4549.57 1642.14 4909.3 142.39
## -------------------------------------------------------- 
## x: Succinct (n=135)
##     n    mean      sd median     se
##   129 4479.65 1616.04 4807.7 142.28
## -------------------------------------------------------- 
## x: Opt out (n=83)
##    n    mean      sd median     se
##   83 4513.16 1687.87 5067.3 185.27

## ### Oneway Anova for y=H_Weartime.SUM and x=SMSg3 (groups: Reason (n=133), Succinct (n=135), Opt out (n=83))
## 
## Eta Squared: 95% CI = [0; 0], point estimate = 0
## 
##                                           SS  Df         MS    F    p
## Between groups (error + effect)    320392.32   2  160196.16 0.06 .942
## Within groups (error only)      923850181.99 342 2701316.32          
## 
## ### Levene's test:
## 
## Levene's Test for Homogeneity of Variance (center = mean)
##        Df F value Pr(>F)
## group   2  0.9296 0.3957
##       342               
## 
## ### Post hoc test: holm
## 
##                  Reason (n=133) Succinct (n=135)
## Succinct (n=135) 1                              
## Opt out (n=83)   1              1

# Check correlations between outcome vars
lapply(c("pearson", "kendall", "spearman"), function(x) {cor(lmi$H_Weartime.SUM, lmi$H_DaysWornN_over10h, use = "pairwise.complete.obs", method = x)})

## [[1]]
## [1] 0.9485561
## 
## [[2]]
## [1] 0.8104757
## 
## [[3]]
## [1] 0.9267793

# MANOVA, reason vs. succinct
Y <- cbind(lmi$H_Weartime.SUM, lmi$H_DaysWornN_over10h)
fit <- manova(Y ~ lmi$SMSg2)
summary(fit, test="Pillai")

##            Df   Pillai approx F num Df den Df Pr(>F)
## lmi$SMSg2   1 0.014226   1.8688      2    259 0.1564
## Residuals 260

# MANOVA, reason vs. succinct vs. opt out
Y <- cbind(lmi$H_Weartime.SUM, lmi$H_DaysWornN_over10h)
fit <- manova(Y ~ lmi$SMSg3)
lapply(c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"), function(x) {summary(fit, test=x)})

## [[1]]
##            Df  Pillai approx F num Df den Df  Pr(>F)  
## lmi$SMSg3   2 0.02684   2.3261      4    684 0.05503 .
## Residuals 342                                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[2]]
##            Df   Wilks approx F num Df den Df  Pr(>F)  
## lmi$SMSg3   2 0.97316   2.3348      4    682 0.05426 .
## Residuals 342                                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[3]]
##            Df Hotelling-Lawley approx F num Df den Df Pr(>F)  
## lmi$SMSg3   2          0.02757   2.3434      4    680 0.0535 .
## Residuals 342                                                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[4]]
##            Df      Roy approx F num Df den Df   Pr(>F)   
## lmi$SMSg3   2 0.027376   4.6813      2    342 0.009869 **
## Residuals 342                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Type 3 sums of squares:

fitIII <- lm(cbind(H_Weartime.SUM, H_DaysWornN_over10h) ~ SMSg3, data=lmi)
ManRes <- Manova(fitIII, type="III")
summary(ManRes, multivariate=TRUE)

## 
## Type III MANOVA Tests:
## 
## Sum of squares and products for error:
##                     H_Weartime.SUM H_DaysWornN_over10h
## H_Weartime.SUM           923850182         1064286.630
## H_DaysWornN_over10h        1064287            1357.752
## 
## ------------------------------------------
##  
## Term: (Intercept) 
## 
## Sum of squares and products for the hypothesis:
##                     H_Weartime.SUM H_DaysWornN_over10h
## H_Weartime.SUM          2752913824         2866230.000
## H_DaysWornN_over10h        2866230            2984.211
## 
## Multivariate Tests: (Intercept)
##                  Df test stat approx F num Df den Df     Pr(>F)    
## Pillai            1  0.763968 551.8597      2    341 < 2.22e-16 ***
## Wilks             1  0.236032 551.8597      2    341 < 2.22e-16 ***
## Hotelling-Lawley  1  3.236714 551.8597      2    341 < 2.22e-16 ***
## Roy               1  3.236714 551.8597      2    341 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: SMSg3 
## 
## Sum of squares and products for the hypothesis:
##                     H_Weartime.SUM H_DaysWornN_over10h
## H_Weartime.SUM         320392.3213         -340.170252
## H_DaysWornN_over10h      -340.1703            2.375795
## 
## Multivariate Tests: SMSg3
##                  Df test stat approx F num Df den Df    Pr(>F)   
## Pillai            2 0.0268404 2.326071      4    684 0.0550310 . 
## Wilks             2 0.9731648 2.334803      4    682 0.0542588 . 
## Hotelling-Lawley  2 0.0275699 2.343444      4    680 0.0535048 . 
## Roy               2 0.0273761 4.681316      2    342 0.0098687 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# On Roy's largest root: "Because it is a maximum, it can behave differently from the other three test statistics. In instances where the other three are not significant and Roy's is significant, the effect should be considered insignificant." http://www.ats.ucla.edu/stat/stata/output/Stata_MANOVA.htm

Bayesian ANOVA (NOTE: prior graphs from now on with BF01 instead of BF10):

summary(aov(lmi$H_Weartime.SUM ~ lmi$SMSg3))

##              Df    Sum Sq Mean Sq F value Pr(>F)
## lmi$SMSg3     2    320392  160196   0.059  0.942
## Residuals   342 923850182 2701316               
## 155 observations deleted due to missingness

weartime.fullobs <- data.frame(lmi$H_Weartime.SUM, lmi$SMSg3)
weartime.fullobs <- weartime.fullobs[complete.cases(weartime.fullobs), ]

#These are equivalent:
weartimeBf <- anovaBF(lmi.H_Weartime.SUM ~ lmi.SMSg3, data = weartime.fullobs, whichRandom = weartime.fullobs$lmi.H_Weartime.SUM)

weartimeBf <- anovaBF(lmi.H_Weartime.SUM ~ lmi.SMSg3, data = weartime.fullobs, rscaleFixed = 1/2)

#BF10:
extractBF(weartimeBf)$bf

## [1] 0.03444662

#BF01:
extractBF(1/weartimeBf)$bf

## [1] 29.03042

# Create a vector of different prior concentrations:
priorWidths <- c(seq(0.01, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(weartimeBf <- anovaBF(lmi.H_Weartime.SUM ~ lmi.SMSg3, data = weartime.fullobs, rscaleFixed = ownprior))$bf
})

bayesFactors2 <- 1/bayesFactors

# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot results with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors2, group = 1)) + 
    ylim(0, max(bayesFactors2)) +
    xlim(min(priorWidths), max(priorWidths)) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF01") +
    theme_bw()

# Dashed line for BF10 = 1/10, indicating strong evidence for null.
plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Note that unlike in the thesis, this is not actually the end of the analysis. The null model can be false in ways not consistent with H1 (see here). The code that follows is also adapted from the link.

The number of possible orderings in this case is 3!/(3-2)! = 6. Thus, prior odds are 1/6.

Sampling from the posterior:

weartimeBf <- anovaBF(lmi.H_Weartime.SUM ~ lmi.SMSg3, data = weartime.fullobs)
samples <- posterior(weartimeBf, iterations = 10000)
head(samples)

## Markov Chain Monte Carlo (MCMC) output:
## Start = 1 
## End = 7 
## Thinning interval = 1 
##            mu lmi.SMSg3-Reason (n=133) lmi.SMSg3-Succinct (n=135)
## [1,] 4514.109                 35.27023                  -34.18427
## [2,] 4523.548                117.55878                  -49.97479
## [3,] 4489.306                151.83371                 -178.71618
## [4,] 4477.915                 80.20194                   42.49761
## [5,] 4425.685                134.84713                   62.85131
## [6,] 4562.101                -48.45661                   67.67999
## [7,] 4474.811                 30.76186                  -91.76186
##      lmi.SMSg3-Opt out (n=83)    sig2 g_lmi.SMSg3
## [1,]                -1.085956 2320724  0.02947340
## [2,]               -67.583984 2828309  0.39380493
## [3,]                26.882465 2992412  0.03893048
## [4,]              -122.699548 2935118  0.05638075
## [5,]              -197.698436 2959429  0.08120279
## [6,]               -19.223387 2476897  0.18590240
## [7,]                60.999998 2490614  0.04191076

Check the proportion of samples where Reason > Succinct > Opt out.

consistent <- (samples[, "lmi.SMSg3-Reason (n=133)"] > samples[, "lmi.SMSg3-Succinct (n=135)"]) &
  (samples[, "lmi.SMSg3-Succinct (n=135)"] > samples[, "lmi.SMSg3-Opt out (n=83)"])
N_consistent <- sum(consistent)

# posterior probability of the order reason > succinct > opt out:

N_consistent / 10000

## [1] 0.2189

Now, the posterior restriction for the full model is 0.2189 divided by 1. Bayes factor is:

bf_restriction_against_full = (N_consistent / 10000) / (1 / 6)
bf_restriction_against_full

## [1] 1.3134

The data are not sensitive enough to say anything about the specified order against the full model. The evidence doesn’t give a boost to order prediction, because p(prediction is true) is low and/or riskiness of prediction is low.

## Convert bf1 to a number so that we can multiply it
bf_full_against_null <- as.vector(weartimeBf)
## Use transitivity to compute desired Bayes factor
bf_restriction_against_null <- bf_restriction_against_full * bf_full_against_null

So, the BF10 of the order is 0.0452422 and the BF01 is 22.1032591.

Weardays

pirateplot(formula = H_DaysWornN_over10h ~ SMSg3,
           data = lmi,
           xlab = "",
           ylab = ">10h Measurement days",
           pal = "black",
           point.o = .8,
           line.o = 1,
           theme.o = 0,
           bean.o = .2,
           line.lwd = 1,
           hdi.o = .3,
           point.pch = 1,
           point.cex = 1,
           jitter.val = .07,
           cut.min = .1,
           cut.max = 7.2
)

Was sent messages to v. was not

lmi$SMS_group_receiveornot[lmi$SMS_group==1] <- "1"
lmi$SMS_group_receiveornot[lmi$SMS_group==2] <- "1"
lmi$SMS_group_receiveornot[lmi$SMS_group==3] <- "2"
lmi$SMS_group_receiveornot[lmi$SMS_group==4] <- "2"
table(lmi$SMS_group_receiveornot)

## 
##   1   2 
## 273 102

chisq.test(lmi$H_DaysWornN_over10h, lmi$SMS_group_receiveornot)

## 
##  Pearson's Chi-squared test
## 
## data:  lmi$H_DaysWornN_over10h and lmi$SMS_group_receiveornot
## X-squared = 8.344, df = 7, p-value = 0.3032

# Create data matrix for Bayesian contingency tables:
table.dayswornbf <- table(data.frame(lmi$H_DaysWornN_over10h, lmi$SMS_group_receiveornot))

# Bayes factor for the contingency tables with default prior concentration:
dayswornbf <- contingencyTableBF(table.dayswornbf, sampleType = "poisson")
extractBF(dayswornbf)$bf

## [1] 0.02874026

1/dayswornbf

## Bayes factor analysis
## --------------
## [1] Indep. (a=1) : 34.79439 ±0%
## 
## Against denominator:
##   Alternative, non-independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, poisson

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.dayswornbf, sampleType = "poisson", priorConcentration = ownprior))$bf
})

bayesFactors2 <- 1/bayesFactors
# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors2, group = 1)) + 
    ylim(0, max(bayesFactors2)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF01") +
    theme_bw()

# Dashed line for BF01 = 1/10, indicating strong evidence for alternative.
plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

# Dashed line for BF01 = 10, indicating strong evidence for null.
plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Note: Master’s thesis has a problem here:

“Differences were not detected in valid wear day distributions between participants receiving and not receiving reminders, ??2(7)=8.344, p=0.303. A BF01=25.87 (Poisson sampling, prior concentration = 1.0; as concentration approaches 2, BF01 approaches 66.42) indicates moderate to strong support for equality.”

Should read: “[…] A BF01=34.79 (Poisson sampling, prior concentration = 1.0; as concentration approaches 2, BF01 approaches 93.50) […]”

Reason v. Succinct:

chisq.test(lmi$H_DaysWornN_over10h, lmi$SMSg2)

## 
##  Pearson's Chi-squared test
## 
## data:  lmi$H_DaysWornN_over10h and lmi$SMSg2
## X-squared = 7.8931, df = 7, p-value = 0.3421

# Create data matrix for Bayesian contingency tables:
table.dayswornbf <- table(data.frame(lmi$H_DaysWornN_over10h, lmi$SMSg2))

# Bayes factor for the contingency tables with default prior concentration:
dayswornbf <- contingencyTableBF(table.dayswornbf, sampleType = "poisson")
extractBF(dayswornbf)$bf

## [1] 0.143689

1/dayswornbf

## Bayes factor analysis
## --------------
## [1] Indep. (a=1) : 6.959475 ±0%
## 
## Against denominator:
##   Alternative, non-independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, poisson

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.dayswornbf, sampleType = "poisson", priorConcentration = ownprior))$bf
})

bayesFactors2 <- 1/bayesFactors
# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors2, group = 1)) + 
    ylim(0, max(bayesFactors2)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF01") +
    theme_bw()

# Dashed line for BF01 = 1/10, indicating strong evidence for alternative.
plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

# Dashed line for BF01 = 10, indicating strong evidence for null.
plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Opt in v. opt out:

chisq.test(lmi$H_DaysWornN_over10h, lmi$SMSg4)

## 
##  Pearson's Chi-squared test
## 
## data:  lmi$H_DaysWornN_over10h and lmi$SMSg4
## X-squared = 8.9596, df = 7, p-value = 0.2556

# Create data matrix for Bayesian contingency tables:
table.dayswornbf <- table(data.frame(lmi$H_DaysWornN_over10h, lmi$SMSg4))

# Bayes factor for the contingency tables with default prior concentration:
dayswornbf <- contingencyTableBF(table.dayswornbf, sampleType = "poisson")
extractBF(dayswornbf)$bf

## [1] 0.03864754

1/dayswornbf

## Bayes factor analysis
## --------------
## [1] Indep. (a=1) : 25.87487 ±0%
## 
## Against denominator:
##   Alternative, non-independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, poisson

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.dayswornbf, sampleType = "poisson", priorConcentration = ownprior))$bf
})

bayesFactors2 <- 1/bayesFactors
# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors2, group = 1)) + 
    ylim(0, max(bayesFactors2)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF01") +
    theme_bw()

# Dashed line for BF01 = 1/10, indicating strong evidence for alternative.
plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

# Dashed line for BF01 = 10, indicating strong evidence for null.
plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Main trial retention: reason vs. succinct

prop.test(x = c(95, 106), n=c(138, 135))

## 
##  2-sample test for equality of proportions with continuity
##  correction
## 
## data:  c(95, 106) out of c(138, 135)
## X-squared = 2.8121, df = 1, p-value = 0.09356
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.20788770  0.01432893
## sample estimates:
##    prop 1    prop 2 
## 0.6884058 0.7851852

sum(!is.na(lmi$hookieON_t3[which(lmi$SMS_group==2)]))

## [1] 106

sum(!is.na(lmi$hookieON_t3[which(lmi$SMS_group==1)]))

## [1] 95

T3_continuer <- lmi$hookieON_t3
T3_continuer[is.na(T3_continuer)] <- 0 

# Create data matrix for Bayesian contingency tables:
table.retentionbf <- table(data.frame(T3_continuer, lmi$SMS_group_reasonVsuccinct))

# Bayes factor for the contingency tables with default prior concentration:
retentionbf <- contingencyTableBF(table.retentionbf, sampleType = "poisson")
extractBF(retentionbf)$bf

## [1] 1.350643

1/retentionbf

## Bayes factor analysis
## --------------
## [1] Indep. (a=1) : 0.7403882 ±0%
## 
## Against denominator:
##   Alternative, non-independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, poisson

As with the Bayesian ANOVA, non-equality is not what we really wanted to study. Instead, we want to know whether the reason group is higher in retention than the succinct.

First, sample from posterior:

samples <- posterior(retentionbf, iterations = 10000)
head(samples)

## Markov Chain Monte Carlo (MCMC) output:
## Start = 1 
## End = 7 
## Thinning interval = 1 
##      lambda[1,1] lambda[2,1] lambda[1,2] lambda[2,2]
## [1,]    54.41548   102.42638    21.81201   110.89881
## [2,]    45.23569    97.05836    32.24041   120.96059
## [3,]    54.38856    95.71040    27.29905   105.93991
## [4,]    52.09945   100.42334    39.73176   104.25790
## [5,]    37.43938    98.20967    39.95458    97.54872
## [6,]    34.73976    88.60897    29.11443   102.14833
## [7,]    50.49197    95.66813    36.15954   102.75750

mean(samples[, 1])

## [1] 43.39694

mean(samples[, 2])

## [1] 94.83223

mean(samples[, 3])

## [1] 29.44762

mean(samples[, 4])

## [1] 105.5513

# This is total reason group size:
43+95

## [1] 138

# This is proportion of reason continuing the study:
95/(43+95)

## [1] 0.6884058

# This is total succinct group size:
106+29

## [1] 135

# This is proportion of reason continuing the study:
106/(29+106)

## [1] 0.7851852

# This is mean proportion from the reason group, from simulations: 
mean(samples[, 2] / (samples[, 1] + samples[, 2]))

## [1] 0.68607

# This is mean proportion from the succinct group, from simulations:
mean(samples[, 4] / (samples[, 3] + samples[, 4]))

## [1] 0.7818905

Check the proportion of samples where proportion(Reason) > proportion(Succinct).

consistent <- ((samples[, 2] / (samples[, 1] + samples[, 2])) > (samples[, 4] / (samples[, 3] + samples[, 4])))

N_consistent <- sum(consistent)

# posterior probability of the order reason > succinct:

N_consistent / 10000

## [1] 0.0352

Now, the posterior restriction for the full model is 0.0352 divided by 1. Bayes factor is:

bf_restriction_against_full = (N_consistent / 10000) / (1 / 2)
bf_restriction_against_full

## [1] 0.0704

The data point against the full model.

## Convert bf1 to a number so that we can multiply it
bf_full_against_null <- as.vector(retentionbf)
## Use transitivity to compute desired Bayes factor
bf_restriction_against_null <- bf_restriction_against_full * bf_full_against_null

So, the BF10 of the order is 0.0950853 and the BF01 is 10.5168779.

# Create a vector of different prior concentrations:
priorWidths <- c(seq(1, 1.25, by = 0.001), seq(1.25, 2, by = 0.01))

# Save the BFs calculated w/ each concentration:
bayesFactors <- sapply(priorWidths, function(ownprior) {
    extractBF(contingencyTableBF(table.retentionbf, sampleType = "poisson", priorConcentration = ownprior))$bf
})
contingencyTableBF(table.retentionbf, sampleType = "poisson")

## Bayes factor analysis
## --------------
## [1] Non-indep. (a=1) : 1.350643 ±0%
## 
## Against denominator:
##   Null, independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, poisson

bayesFactors2 <- 1/bayesFactors

# Make a data frame for ggplot
plotdf <- data.frame(priorWidths, bayesFactors)

# Plot BFs with different priors:
plot1 <- ggplot(data = plotdf, aes(x = priorWidths, y = bayesFactors2, group = 1)) + 
    ylim(0, max(bayesFactors2)) +
    xlim(1, 2) +
    geom_line() +
    geom_hline(yintercept = 0, colour = "azure4") +
    xlab("Prior Width") +
    ylab("BF01") +
    theme_bw()

# Dashed line for BF01 = 1/10, indicating strong evidence for alternative.
plot2 <- plot1 + geom_hline(yintercept = 1/10, linetype = "dashed", colour = "darkgrey") 

# Dashed line for BF01 = 10, indicating strong evidence for null.
plot2 + geom_hline(yintercept = 10, linetype = "dashed", colour = "darkgrey")

Hey, looks like you got to the end of the document! A damned rare case, I’m sure. Why not drop me a note on Twitter (@heinonmatti) and tell me how it was? Also, I gladly receive feedback on the general form of the code!

Analysis code: A within-trial RCT to compare persuasive messages

Matti Heino

3 May 2016