sessionInfo()

## R version 3.2.3 (2015-12-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 10586)
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252 
## [2] LC_CTYPE=English_United Kingdom.1252   
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] magrittr_1.5    formatR_1.2.1   tools_3.2.3     htmltools_0.3  
##  [5] yaml_2.1.13     stringi_1.0-1   rmarkdown_0.9.2 knitr_1.11     
##  [9] stringr_1.0.0   digest_0.6.8    evaluate_0.8

if(!require('ggplot2')){install.packages('ggplot2')}
library('ggplot2')

Analysing the study design with Retrodesign.

Gelman, A., & Carlin, J. (2014). Beyond Power Calculations Assessing Type S (Sign) and Type M (Magnitude) Errors. Perspectives on Psychological Science, 9(6), 641-651. http://doi.org/10.1177/1745691614551642

Note: standard error formula for d below acquired from slide 9 (on p. 5) of this document. Please let me know if you know of a reason one should not use it like I did.

Note 2: I will gladly take feedback on how to improve my code writing skills!

## Create a vector of possible effect sizes for the x-axis:
xax <- seq(from = 0.05, to = 2, by = 0.05)

## Calculate the SE of d in this particular case:
n1 <- 133
n2 <- 129
sed <- sqrt((n1+n2)/(n1*n2)+(xax^2)/(2*(n1+n2)))

Whip up Retrodesign (all this from Gelman & Carlin):

retrodesign <- function(A, s, alpha=.05, df=Inf, n.sims=10000){
  z <- qt(1-alpha/2, df)
  p.hi <- 1 - pt(z-A/s, df)
  p.lo <- pt(-z-A/s, df)
  power <- p.hi + p.lo
  typeS <- p.lo/power
  estimate <- A + s*rt(n.sims,df)
  significant <- abs(estimate) > s*z
  exaggeration <- mean(abs(estimate)[significant])/A
  return(list(power=power, typeS=typeS, exaggeration=exaggeration))
}

Draw plots:

## Power:
retroPow <- (retrodesign(xax, sed)$power)

qplot(xax, retroPow) + geom_point() + geom_line() + xlab("True effect size d") + 
    ylab("Power") + scale_y_continuous(breaks = seq(0, 1, 0.05), minor_breaks = seq(0, 
    1, 0.05)) + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14, 
    face = "bold")) + scale_x_continuous(breaks = seq(0, 1, 0.1), minor_breaks = seq(0, 
    1, 0.1), limits = c(0, 1))

## Exaggeration ratio:
retroExg <- (retrodesign(xax, sed)$exaggeration)

qplot(xax, retroExg) + ylim(0, 30) + xlim(0, 1) + geom_point() + geom_line() + 
    xlab("True effect size") + ylab("Expected type M error (exaggeration ratio)") + 
    scale_y_continuous(breaks = seq(0, 30, 1), minor_breaks = seq(0, 30, 1)) + 
    theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14, 
        face = "bold"))

## Probability of wrong sign:
retroS <- (retrodesign(xax, sed)$typeS)

qplot(xax, retroS) + ylim(0, 40) + xlim(0, 1) + geom_point() + geom_line() + 
    xlab("True effect size") + ylab("Expected type S error: p(wrong sign)") + 
    scale_y_continuous(breaks = seq(0, 0.2, 0.025), minor_breaks = seq(0, 0.2, 
        0.025)) + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14, 
    face = "bold"))

Evaluating the v-statistic

Code below is adapted from Daniel Lakens’ blog.

if (!require("hypergeo")) {
    install.packages("hypergeo")
}
library("hypergeo")

# Lakens:
#'Below, I'm vectorizing the function so that I can plot curves.
# The rest is unchanged from the vstat function by Stober-Davis & Dana. If
# you want to use R unbiased, remove the # before the Rsq adjustment
# calculation below'
vstat <- Vectorize(function(n, p, Rsq) {
    Rsq = Re(1 - ((n - 2)/(n - p)) * (1 - Rsq) * hypergeo(1, 1, (n - p + 2) * 
        0.5, 1 - Rsq))
    if (Rsq <= 0) {
        Rsq = 1e-04
    }
    r = ((p - 1) * (1 - Rsq))/((n - p) * Rsq)
    g = min(r, 1)
    if (g < 0.5001 && g > 0.4999) {
        g = 0.5001
    }
    z = (g - sqrt(g - g^2))/(2 * g - 1)
    alpha = acos((1 - z)/sqrt(1 - 2 * z * (1 - z)))
    v = Re((((2 * cos(alpha) * gamma((p + 2)/2))/(sqrt(pi) * gamma((p + 1)/2))) * 
        (hypergeo(0.5, (1 - p)/2, 3/2, cos(alpha)^2) - sin(alpha)^(p - 1))))
    return(v)
})

## Plot it:
curve(vstat(Rsq = x, n = 133 + 129 + 83 + 7, p = 2), 0.01, 0.25, type = "l", 
    col = "purple", ylim = c(0, 1), xlab = "R-squared when Estimating 2 Parameters", 
    lty = 1, ylab = "v-statistic")
par(new = TRUE)
curve(vstat(Rsq = x, n = 133 + 129, p = 2), 0.01, 0.25, type = "l", col = "green", 
    ylim = c(0, 1), xaxt = "n", yaxt = "n", xlab = "", ylab = "", lty = 2)
par(new = TRUE)

# Horizontal line at 0.5 cut-off
abline(h = 0.5, col = "azure4", lty = 5)
# Legend
legend(0.05, 0.4, c("Reminder (n=262) v. no reminder (n=90)", "Reason (n=133) v. Succinct (n=129)"), 
    lty = c(1, 2), lwd = c(2.5, 2.5), col = c("purple", "green"))

Design analysis

Matti Heino

25 April 2016

Analysing the study design with Retrodesign.

Evaluating the v-statistic