A P-Value Sampling distribution

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.5

set.seed(1)

# Function for P-value

pvalueSampling <- function(popA, popB, n=25, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

Exercise 1

What was the estimated power of the test in the second example above? (Remember: It was a two-sided test, so use \(\alpha/2\) rather than \(\alpha\).)

#Cohen's D is 0.5

pop_a_pwr <- rnorm(1000, mean = 10, sd = 2)
pop_b_pwr <- rnorm(1000, mean = 11, sd = 2) 

p_value_pwr <- data.frame(pval = pvalueSampling(pop_a_pwr, pop_b_pwr, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_pwr, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_pwr <- p.less(p_value_pwr$pval)
names(p_reject_pwr) <- "Estimated Power"
p_reject_pwr

## Estimated Power 
##       0.2573242

The estimated power is: 0.2573242.

Exercise 2

Modify the example to compare sample means when Cohen’s D is 1.0 and 1.5. How does the observed power change?

# For Cohen's D = 1.0
# Cohen's D = (12-10)/2 = 1

pop_a_CD1 <- rnorm(1000, mean = 10, sd = 2)
pop_b_CD1 <- rnorm(1000, mean = 12, sd = 2) 

p_value_CD1 <- data.frame(pval = pvalueSampling(pop_a_CD1, pop_b_CD1, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_CD1, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_CD1 <- p.less(p_value_CD1$pval)
names(p_reject_CD1) <- "Observed Power"
p_reject_CD1

## Observed Power 
##      0.8771973

# For Cohen's D = 1.5
# Cohen's D = (12-9)/2 = 1.5

pop_a_CD1.5 <- rnorm(1000, mean = 9, sd = 2)
pop_b_CD1.5 <- rnorm(1000, mean = 12, sd = 2) 

p_value_CD1.5 <- data.frame(pval = pvalueSampling(pop_a_CD1.5, pop_b_CD1.5, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_CD1.5, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_CD1.5 <- p.less(p_value_CD1.5$pval)
names(p_reject_CD1.5) <- "Observed Power"
p_reject_CD1.5

## Observed Power 
##      0.9985352

As Cohen’s D increases the observed power increases and gets closer and closer to 1.

Exercise 3

Reduce the sample sizes to \(n=9\), and compare to one of your previous cases. How does the observed power change?

pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

# Cohen's D = 0.5

pop_a_E3 <- rnorm(1000, mean = 10, sd = 2)
pop_b_E3 <- rnorm(1000, mean = 11, sd = 2) 

p_value_E3 <- data.frame(pval = pvalueSampling(pop_a_E3, pop_b_E3, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_E3, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_E3<- p.less(p_value_E3$pval)
names(p_reject_E3) <- "Observed Power"
p_reject_E3

## Observed Power 
##     0.09228516

With a sample size of 25 the power was a respectable0.2573242. Decreasing te sample size to 9 drastically reduces the power to 0.0922852.

Exercise 4

Perform a similar comparison where the underlying populations are not normally distributed. You might try either a uniform distribution (runif(n, min, max)) or a gamma distribution (rgamma(n,shape,rate)). When adjusting your values for Cohen’s D, recall that

\[\begin{aligned} U \sim unif(a,b) && E[U] = \frac{a+b}{2} && Var[U] = \frac{(b-a)^2}{12} \\ X \sim gamma(\alpha,\lambda) && E[X] = \frac{\alpha}{\lambda} && Var[X] = \frac{\alpha}{\lambda^2} \end{aligned} \]

# Using a Gamma Distribution
# Cohen's D = 0.471

pop_a_G1 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G1 <- rgamma(1000, shape = 5, rate = 1 ) 

p_value_G1 <- data.frame(pval = pvalueSampling(pop_a_G1, pop_b_G1, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_G1, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_G1 <- p.less(p_value_G1$pval)
names(p_reject_G1) <- "Observed Power"
p_reject_G1

## Observed Power 
##     0.08398438

The estimated power is: 0.0839844.

# Using a Gamma Distribution
# Cohen's D = 1.091

pop_a_G2 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G2 <- rgamma(1000, shape = 6.5, rate = 1 ) 

p_value_G2 <- data.frame(pval = pvalueSampling(pop_a_G2, pop_b_G2, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_G2, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_G2 <- p.less(p_value_G2$pval)
names(p_reject_G2) <- "Observed Power"
p_reject_G2

## Observed Power 
##      0.4804688

The estimated power is: 0.4804688.

# Using a Gamma Distribution
# Cohen's D = 1.491

pop_a_G3 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G3 <- rgamma(1000, shape = 7.5, rate = 1 ) 

p_value_G3 <- data.frame(pval = pvalueSampling(pop_a_G3, pop_b_G3, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_G3, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_G3 <- p.less(p_value_G3$pval)
names(p_reject_G3) <- "Observed Power"
p_reject_G3

## Observed Power 
##      0.7453613

As with the normal distribution, as Cohen’s D increased, the closer to 1 the power got. It increased faster with the gamma distribution than it did with the normal distribution.

pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

# Cohen's D = 0.471

pop_a_G4 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G4 <- rgamma(1000, shape = 5, rate = 1 ) 

p_value_G4 <- data.frame(pval = pvalueSampling(pop_a_G4, pop_b_G4, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value_G4, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject_G4<- p.less(p_value_G4$pval)
names(p_reject_G4) <- "Observed Power"
p_reject_G4

## Observed Power 
##     0.07104492

As with the normal distribution, reducing the sample size drastically reduces the Observed Power.

} ```

A P-Value Sampling distribution

Misty Stultz

11/29/2021

Exercise 1

Exercise 2

Exercise 3

Exercise 4