Distributions of P-Values under Model Misspecification

============================================================================================================ The Bayes factor: https://rpubs.com/sherloconan/1160305

1. Skewed \(t\)

1.1 Skew Student-\(t\) Distribution

Same population mean and variance.

x <- seq(-3, 3, by=0.01)
p0 <- dnorm(x, mean=0, sd=1)
p1 <- fGarch::dsstd(x, mean=0, sd=1, nu=4, xi=20)

plot(x, p1, type="l", lty=2, lwd=2, col="red", xlab="x", ylab="Density")
lines(x, p0, type="l", lwd=2)
legend("topright",c("Violated","Normal"), col=c("red","black"), lty=c(2,1), lwd=2, bty="n")

1.2 Simulation (True Effect = 0)

NSim <- 500 #number of replicates

viz2(10, NSim, 0) #function is defined in Section 5

viz2(20, NSim, 0)

viz2(100, NSim, 0)

viz2(1000, NSim, 0)

1.3 Simulation (True Effect = 0.5)

viz2(10, NSim, 0.5)

viz2(20, NSim, 0.5)

viz2(100, NSim, 0.5, xlim=c(0,.005))

viz2(1000, NSim, 0.5, xlim=c(0,1E-43))

2. Uniform

2.1 Continuous Uniform Distribution

Same population mean and variance.

x <- seq(-3, 3, by=0.01)
p0 <- dnorm(x, mean=0, sd=1)
p1 <- dunif(x, min=-sqrt(3), max=sqrt(3))
# mean = (a + b) / 2
# variance = (b - a)^2 / 12 ==>
# a = mean - sqrt(3 * variance)
# b = mean + sqrt(3 * variance)

plot(x, p1, type="l", lty=2, lwd=2, col="red", xlab="x", ylab="Density", ylim=c(0, 0.42))
lines(x, p0, type="l", lwd=2)
legend("topright",c("Violated","Normal"), col=c("red","black"), lty=c(2,1), lwd=2, bty="n")

2.2 Simulation (True Effect = 0)

viz2(10, NSim, 0, dist="unif")

viz2(20, NSim, 0, dist="unif")

viz2(100, NSim, 0, dist="unif")

viz2(1000, NSim, 0, dist="unif")

2.3 Simulation (True Effect = 0.5)

viz2(10, NSim, 0.5, dist="unif")

viz2(20, NSim, 0.5, dist="unif")

viz2(100, NSim, 0.5, dist="unif", xlim=c(0,.005))

viz2(1000, NSim, 0.5, dist="unif", xlim=c(0,1E-43))

3. Triangular

3.1 Symmetric Triangular Distribution

Same population mean and variance.

x <- seq(-3, 3, by=0.01)
p0 <- dnorm(x, mean=0, sd=1)

sym_tri_pdf <- function(x, a, b) {
  #' define the symmetric triangular distribution PDF
  c <- (a + b) / 2
  ifelse(x < a | x > b, 0, ((b - c) - abs(c - x)) / ((b-c)^2))
}

plot(x, sym_tri_pdf(x, -sqrt(6), sqrt(6)),
     type="l", lty=2, lwd=2, col="red", xlab="x", ylab="Density")
lines(x, p0, type="l", lwd=2)
legend("topright",c("Violated","Normal"), col=c("red","black"), lty=c(2,1), lwd=2, bty="n")

3.2 Simulation (True Effect = 0)

viz2(10, NSim, 0, dist="tri")

viz2(20, NSim, 0, dist="tri")

viz2(100, NSim, 0, dist="tri")

viz2(1000, NSim, 0, dist="tri")

3.3 Simulation (True Effect = 0.5)

viz2(10, NSim, 0.5, dist="tri")

viz2(20, NSim, 0.5, dist="tri")

viz2(100, NSim, 0.5, dist="tri", xlim=c(0,.005))

viz2(1000, NSim, 0.5, dist="tri", xlim=c(0,1E-43))

4. Increase \(\sigma_\epsilon^2\)

4.1 Simulation (True Effect = 0)

viz2(10, NSim, 0, dist="100sd")

viz2(20, NSim, 0, dist="100sd")

viz2(100, NSim, 0, dist="100sd")

viz2(1000, NSim, 0, dist="100sd")

4.2 Simulation (True Effect = 0.5)

viz2(10, NSim, 0.5, dist="100sd")

viz2(20, NSim, 0.5, dist="100sd")

viz2(100, NSim, 0.5, dist="100sd")

viz2(1000, NSim, 0.5, dist="100sd")

5. Function

viz2 <- function(N, NSim, ES, SD=1, nu=4, sk=20, dist="skew", ...) {
  #' Input -
  #' N:     number of subjects
  #' NSim:  number of replicates
  #' ES:    effect size
  #' SD:    error standard deviation
  #' nu:    shape parameter (degrees of freedom) of the skewed t
  #' sk:    skewness parameter of the skewed t
  #' dist:  distribution that violates the normality, but has the same population mean and variance
  #'        "skew" (default), skewed t.
  #'        "unif", continuous uniform.
  #'        "tri", symmetric triangular
  #'        "100sd", increase error standard deviation
  #' Dependencies - fGarch, ggplot2
  #' Output - distributions of p-values given data that follow (1) a normal or (2) not a normal
  
  pVal <- pVal_rep <- pVal_vio <- numeric(NSim)
  for (i in 1:NSim) {
    set.seed(i)
    y0 <- rnorm(N, ES, SD)
    pVal[i] <- t.test(y0)$p.value

    set.seed(i-NSim)
    y0_rep <- rnorm(N, ES, SD)
    pVal_rep[i] <- t.test(y0_rep)$p.value
    
    set.seed(i+NSim)
    if (dist=="skew") {
      y1 <- fGarch::rsstd(N, mean=ES, sd=SD, nu=nu, xi=sk)
      label <- "Skewed"
      
    } else if (dist=="unif") {
      # mean = (a + b) / 2
      # variance = (b - a)^2 / 12 
      y1 <- runif(N, min=ES-sqrt(3)*SD, max=ES+sqrt(3)*SD)
      label <- "Uniform"
      
    } else if (dist=="tri") {
      # mean = (a + b) / 2
      # variance = (b - a)^2 / 24
      a <- ES - sqrt(6) * SD
      b <- ES + sqrt(6) * SD
      u <- runif(N) #generate from U(0,1)
      y1 <- ifelse(u < 0.5,
                   a+sqrt((b-a)^2*u/2),
                   b-sqrt((b-a)^2*(1-u)/2)) #inverse transformation method
      label <- "Triangular"
      
    } else if (dist=="100sd") {
      y1 <- rnorm(N, ES, SD*100)
      label <- "Normal (100×SD)"
      
    } else {stop("Bad Input!")}
    pVal_vio[i] <- t.test(y1)$p.value
  }
  
  df <- data.frame("Model"=factor(rep(c("Normal","Normal (rep)",label),each=NSim),
                                  levels=c("Normal","Normal (rep)",label)),
                   "pVal"=c(pVal, pVal_rep, pVal_vio))
  typeI <- c(sum(pVal < .05), sum(pVal_rep < .05), sum(pVal_vio < .05)) / NSim
  
  print(ggplot(df, aes(x=Model, y=pVal))+
    geom_boxplot()+
    labs(x="True Model", y=expression(italic("P")~"-Value"),
         title=expression("One-Sample"~italic(t)~"-Test"),
         subtitle=paste0("Number of Subjects = ",N,", Number of Replicates = ",NSim,", Effect Size = ",ES))+
    annotate("text", x=c(0.9,1.9,2.9), y=.015, label=typeI, color="red", angle=90)+
    geom_hline(yintercept=.05, linetype="dotted", color="gray")+
    geom_hline(yintercept=0, linetype="dashed", color="gray")+
    geom_hline(yintercept=1, linetype="dashed", color="gray")+
    scale_y_continuous(breaks=c(.01,.05,.1,1))+
    theme_classic())
  
  ecdf_norm <- stats::ecdf(pVal) #compute an empirical cumulative distribution function
  ecdf_rep <- stats::ecdf(pVal_rep)
  ecdf_vio <- stats::ecdf(pVal_vio)
  plot(ecdf_norm, main="Empirical CDF Comparison", xlab=expression(italic("P")~"-Value"), ylab="ECDF",
       sub=paste0("Number of Subjects = ",N,", Number of Replicates = ",NSim,", Effect Size = ",ES), ...)
  lines(ecdf_rep, col="blue")
  lines(ecdf_vio, col="red")
  legend("bottomright", legend=c("Normal","Normal (rep)",label), col=c("black","blue","red"), lwd=2)
}

Distributions of \(P\)-Values
under Model Misspecification

Skewed Residuals in the One-Sample t-Test

Zhengxiao Wei, Farouk S. Nathoo, Michael E. J. Masson

2024-03-20

1. Skewed \(t\)

1.1 Skew Student-\(t\) Distribution

1.2 Simulation (True Effect = 0)

1.3 Simulation (True Effect = 0.5)

2. Uniform

2.1 Continuous Uniform Distribution

2.2 Simulation (True Effect = 0)

2.3 Simulation (True Effect = 0.5)

3. Triangular

3.1 Symmetric Triangular Distribution

3.2 Simulation (True Effect = 0)

3.3 Simulation (True Effect = 0.5)

4. Increase \(\sigma_\epsilon^2\)

4.1 Simulation (True Effect = 0)

4.2 Simulation (True Effect = 0.5)

5. Function

Distributions of \(P\)-Values under Model Misspecification

Skewed Residuals in the One-Sample t-Test

Zhengxiao Wei, Farouk S. Nathoo, Michael E. J. Masson

2024-03-20

1. Skewed \(t\)

1.1 Skew Student-\(t\) Distribution

1.2 Simulation (True Effect = 0)

1.3 Simulation (True Effect = 0.5)

2. Uniform

2.1 Continuous Uniform Distribution

2.2 Simulation (True Effect = 0)

2.3 Simulation (True Effect = 0.5)

3. Triangular

3.1 Symmetric Triangular Distribution

3.2 Simulation (True Effect = 0)

3.3 Simulation (True Effect = 0.5)

4. Increase \(\sigma_\epsilon^2\)

4.1 Simulation (True Effect = 0)

4.2 Simulation (True Effect = 0.5)

5. Function

Distributions of \(P\)-Values
under Model Misspecification