============================================================================================================ The Bayes factor: https://rpubs.com/sherloconan/1160305
\(p\)-values: https://rpubs.com/sherloconan/1163912
The Bayes factors and \(p\)-values: https://rpubs.com/sherloconan/1164042
MORE: https://github.com/zhengxiaoUVic/Bayesian
Same population mean and variance.
x <- seq(-3, 3, by=0.01)
p0 <- dnorm(x, mean=0, sd=1)
p1 <- fGarch::dsstd(x, mean=0, sd=1, nu=4, xi=20)
plot(x, p1, type="l", lty=2, lwd=2, col="red", xlab="x", ylab="Density")
lines(x, p0, type="l", lwd=2)
legend("topright",c("Violated","Normal"), col=c("red","black"), lty=c(2,1), lwd=2, bty="n")
NSim <- 500 #number of replicates
viz2(10, NSim, 0) #function is defined in Section 5
viz2(20, NSim, 0)
viz2(100, NSim, 0)
viz2(1000, NSim, 0)
viz2(10, NSim, 0.5)
viz2(20, NSim, 0.5)
viz2(100, NSim, 0.5, xlim=c(0,.005))
viz2(1000, NSim, 0.5, xlim=c(0,1E-43))
Same population mean and variance.
x <- seq(-3, 3, by=0.01)
p0 <- dnorm(x, mean=0, sd=1)
p1 <- dunif(x, min=-sqrt(3), max=sqrt(3))
# mean = (a + b) / 2
# variance = (b - a)^2 / 12 ==>
# a = mean - sqrt(3 * variance)
# b = mean + sqrt(3 * variance)
plot(x, p1, type="l", lty=2, lwd=2, col="red", xlab="x", ylab="Density", ylim=c(0, 0.42))
lines(x, p0, type="l", lwd=2)
legend("topright",c("Violated","Normal"), col=c("red","black"), lty=c(2,1), lwd=2, bty="n")
viz2(10, NSim, 0, dist="unif")
viz2(20, NSim, 0, dist="unif")
viz2(100, NSim, 0, dist="unif")
viz2(1000, NSim, 0, dist="unif")
viz2(10, NSim, 0.5, dist="unif")
viz2(20, NSim, 0.5, dist="unif")
viz2(100, NSim, 0.5, dist="unif", xlim=c(0,.005))
viz2(1000, NSim, 0.5, dist="unif", xlim=c(0,1E-43))
Same population mean and variance.
x <- seq(-3, 3, by=0.01)
p0 <- dnorm(x, mean=0, sd=1)
sym_tri_pdf <- function(x, a, b) {
#' define the symmetric triangular distribution PDF
c <- (a + b) / 2
ifelse(x < a | x > b, 0, ((b - c) - abs(c - x)) / ((b-c)^2))
}
plot(x, sym_tri_pdf(x, -sqrt(6), sqrt(6)),
type="l", lty=2, lwd=2, col="red", xlab="x", ylab="Density")
lines(x, p0, type="l", lwd=2)
legend("topright",c("Violated","Normal"), col=c("red","black"), lty=c(2,1), lwd=2, bty="n")
viz2(10, NSim, 0, dist="tri")
viz2(20, NSim, 0, dist="tri")
viz2(100, NSim, 0, dist="tri")
viz2(1000, NSim, 0, dist="tri")
viz2(10, NSim, 0.5, dist="tri")
viz2(20, NSim, 0.5, dist="tri")
viz2(100, NSim, 0.5, dist="tri", xlim=c(0,.005))
viz2(1000, NSim, 0.5, dist="tri", xlim=c(0,1E-43))
viz2(10, NSim, 0, dist="100sd")
viz2(20, NSim, 0, dist="100sd")
viz2(100, NSim, 0, dist="100sd")
viz2(1000, NSim, 0, dist="100sd")
viz2(10, NSim, 0.5, dist="100sd")
viz2(20, NSim, 0.5, dist="100sd")
viz2(100, NSim, 0.5, dist="100sd")
viz2(1000, NSim, 0.5, dist="100sd")
viz2 <- function(N, NSim, ES, SD=1, nu=4, sk=20, dist="skew", ...) {
#' Input -
#' N: number of subjects
#' NSim: number of replicates
#' ES: effect size
#' SD: error standard deviation
#' nu: shape parameter (degrees of freedom) of the skewed t
#' sk: skewness parameter of the skewed t
#' dist: distribution that violates the normality, but has the same population mean and variance
#' "skew" (default), skewed t.
#' "unif", continuous uniform.
#' "tri", symmetric triangular
#' "100sd", increase error standard deviation
#' Dependencies - fGarch, ggplot2
#' Output - distributions of p-values given data that follow (1) a normal or (2) not a normal
pVal <- pVal_rep <- pVal_vio <- numeric(NSim)
for (i in 1:NSim) {
set.seed(i)
y0 <- rnorm(N, ES, SD)
pVal[i] <- t.test(y0)$p.value
set.seed(i-NSim)
y0_rep <- rnorm(N, ES, SD)
pVal_rep[i] <- t.test(y0_rep)$p.value
set.seed(i+NSim)
if (dist=="skew") {
y1 <- fGarch::rsstd(N, mean=ES, sd=SD, nu=nu, xi=sk)
label <- "Skewed"
} else if (dist=="unif") {
# mean = (a + b) / 2
# variance = (b - a)^2 / 12
y1 <- runif(N, min=ES-sqrt(3)*SD, max=ES+sqrt(3)*SD)
label <- "Uniform"
} else if (dist=="tri") {
# mean = (a + b) / 2
# variance = (b - a)^2 / 24
a <- ES - sqrt(6) * SD
b <- ES + sqrt(6) * SD
u <- runif(N) #generate from U(0,1)
y1 <- ifelse(u < 0.5,
a+sqrt((b-a)^2*u/2),
b-sqrt((b-a)^2*(1-u)/2)) #inverse transformation method
label <- "Triangular"
} else if (dist=="100sd") {
y1 <- rnorm(N, ES, SD*100)
label <- "Normal (100×SD)"
} else {stop("Bad Input!")}
pVal_vio[i] <- t.test(y1)$p.value
}
df <- data.frame("Model"=factor(rep(c("Normal","Normal (rep)",label),each=NSim),
levels=c("Normal","Normal (rep)",label)),
"pVal"=c(pVal, pVal_rep, pVal_vio))
typeI <- c(sum(pVal < .05), sum(pVal_rep < .05), sum(pVal_vio < .05)) / NSim
print(ggplot(df, aes(x=Model, y=pVal))+
geom_boxplot()+
labs(x="True Model", y=expression(italic("P")~"-Value"),
title=expression("One-Sample"~italic(t)~"-Test"),
subtitle=paste0("Number of Subjects = ",N,", Number of Replicates = ",NSim,", Effect Size = ",ES))+
annotate("text", x=c(0.9,1.9,2.9), y=.015, label=typeI, color="red", angle=90)+
geom_hline(yintercept=.05, linetype="dotted", color="gray")+
geom_hline(yintercept=0, linetype="dashed", color="gray")+
geom_hline(yintercept=1, linetype="dashed", color="gray")+
scale_y_continuous(breaks=c(.01,.05,.1,1))+
theme_classic())
ecdf_norm <- stats::ecdf(pVal) #compute an empirical cumulative distribution function
ecdf_rep <- stats::ecdf(pVal_rep)
ecdf_vio <- stats::ecdf(pVal_vio)
plot(ecdf_norm, main="Empirical CDF Comparison", xlab=expression(italic("P")~"-Value"), ylab="ECDF",
sub=paste0("Number of Subjects = ",N,", Number of Replicates = ",NSim,", Effect Size = ",ES), ...)
lines(ecdf_rep, col="blue")
lines(ecdf_vio, col="red")
legend("bottomright", legend=c("Normal","Normal (rep)",label), col=c("black","blue","red"), lwd=2)
}