library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
set.seed(1)
# Function for P-value
pvalueSampling <- function(popA, popB, n=25, REPS=1000) {
dist <- array(data=NA, dim=REPS)
for (i in 1:REPS){
A <- sample(popA, size=n)
B <- sample(popB, size=n)
H <- t.test(A, B, alternative="two.sided")
dist[i] <- H$p.value
}
return(dist)
}
p.less <- function(A, limit=0.05/2) {
return( length(A[A < limit])/length(A) )
}
What was the estimated power of the test in the second example above? (Remember: It was a two-sided test, so use \(\alpha/2\) rather than \(\alpha\).)
#Cohen's D is 0.5
pop_a_pwr <- rnorm(1000, mean = 10, sd = 2)
pop_b_pwr <- rnorm(1000, mean = 11, sd = 2)
p_value_pwr <- data.frame(pval = pvalueSampling(pop_a_pwr, pop_b_pwr, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_pwr, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_pwr <- p.less(p_value_pwr$pval)
names(p_reject_pwr) <- "Estimated Power"
p_reject_pwr
## Estimated Power
## 0.2573242
The estimated power is: 0.2573242.
Modify the example to compare sample means when Cohen’s D is 1.0 and 1.5. How does the observed power change?
# For Cohen's D = 1.0
# Cohen's D = (12-10)/2 = 1
pop_a_CD1 <- rnorm(1000, mean = 10, sd = 2)
pop_b_CD1 <- rnorm(1000, mean = 12, sd = 2)
p_value_CD1 <- data.frame(pval = pvalueSampling(pop_a_CD1, pop_b_CD1, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_CD1, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_CD1 <- p.less(p_value_CD1$pval)
names(p_reject_CD1) <- "Observed Power"
p_reject_CD1
## Observed Power
## 0.8771973
# For Cohen's D = 1.5
# Cohen's D = (12-9)/2 = 1.5
pop_a_CD1.5 <- rnorm(1000, mean = 9, sd = 2)
pop_b_CD1.5 <- rnorm(1000, mean = 12, sd = 2)
p_value_CD1.5 <- data.frame(pval = pvalueSampling(pop_a_CD1.5, pop_b_CD1.5, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_CD1.5, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_CD1.5 <- p.less(p_value_CD1.5$pval)
names(p_reject_CD1.5) <- "Observed Power"
p_reject_CD1.5
## Observed Power
## 0.9985352
As Cohen’s D increases the observed power increases and gets closer and closer to 1.
Reduce the sample sizes to \(n=9\), and compare to one of your previous cases. How does the observed power change?
pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
dist <- array(data=NA, dim=REPS)
for (i in 1:REPS){
A <- sample(popA, size=n)
B <- sample(popB, size=n)
H <- t.test(A, B, alternative="two.sided")
dist[i] <- H$p.value
}
return(dist)
}
p.less <- function(A, limit=0.05/2) {
return( length(A[A < limit])/length(A) )
}
# Cohen's D = 0.5
pop_a_E3 <- rnorm(1000, mean = 10, sd = 2)
pop_b_E3 <- rnorm(1000, mean = 11, sd = 2)
p_value_E3 <- data.frame(pval = pvalueSampling(pop_a_E3, pop_b_E3, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_E3, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_E3<- p.less(p_value_E3$pval)
names(p_reject_E3) <- "Observed Power"
p_reject_E3
## Observed Power
## 0.09228516
With a sample size of 25 the power was a respectable0.2573242. Decreasing te sample size to 9 drastically reduces the power to 0.0922852.
Perform a similar comparison where the underlying populations are not normally distributed. You might try either a uniform distribution (runif(n, min, max)) or a gamma distribution (rgamma(n,shape,rate)). When adjusting your values for Cohen’s D, recall that
\[\begin{aligned} U \sim unif(a,b) && E[U] = \frac{a+b}{2} && Var[U] = \frac{(b-a)^2}{12} \\ X \sim gamma(\alpha,\lambda) && E[X] = \frac{\alpha}{\lambda} && Var[X] = \frac{\alpha}{\lambda^2} \end{aligned} \]
# Using a Gamma Distribution
# Cohen's D = 0.471
pop_a_G1 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G1 <- rgamma(1000, shape = 5, rate = 1 )
p_value_G1 <- data.frame(pval = pvalueSampling(pop_a_G1, pop_b_G1, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_G1, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_G1 <- p.less(p_value_G1$pval)
names(p_reject_G1) <- "Observed Power"
p_reject_G1
## Observed Power
## 0.08398438
The estimated power is: 0.0839844.
# Using a Gamma Distribution
# Cohen's D = 1.091
pop_a_G2 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G2 <- rgamma(1000, shape = 6.5, rate = 1 )
p_value_G2 <- data.frame(pval = pvalueSampling(pop_a_G2, pop_b_G2, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_G2, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_G2 <- p.less(p_value_G2$pval)
names(p_reject_G2) <- "Observed Power"
p_reject_G2
## Observed Power
## 0.4804688
The estimated power is: 0.4804688.
# Using a Gamma Distribution
# Cohen's D = 1.491
pop_a_G3 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G3 <- rgamma(1000, shape = 7.5, rate = 1 )
p_value_G3 <- data.frame(pval = pvalueSampling(pop_a_G3, pop_b_G3, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_G3, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_G3 <- p.less(p_value_G3$pval)
names(p_reject_G3) <- "Observed Power"
p_reject_G3
## Observed Power
## 0.7453613
As with the normal distribution, as Cohen’s D increased, the closer to 1 the power got. It increased faster with the gamma distribution than it did with the normal distribution.
pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
dist <- array(data=NA, dim=REPS)
for (i in 1:REPS){
A <- sample(popA, size=n)
B <- sample(popB, size=n)
H <- t.test(A, B, alternative="two.sided")
dist[i] <- H$p.value
}
return(dist)
}
p.less <- function(A, limit=0.05/2) {
return( length(A[A < limit])/length(A) )
}
# Cohen's D = 0.471
pop_a_G4 <- rgamma(1000, shape = 4, rate = 1)
pop_b_G4 <- rgamma(1000, shape = 5, rate = 1 )
p_value_G4 <- data.frame(pval = pvalueSampling(pop_a_G4, pop_b_G4, REPS = 4096))
ALPHA <- 0.05/2
ggplot(data=p_value_G4, aes(pval)) +
geom_histogram(aes(y=..density..), bins=64,
col="orange", fill="green") +
geom_density(col="red") +
geom_vline(xintercept=ALPHA, color="red") +
labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
theme_minimal(12)
p_reject_G4<- p.less(p_value_G4$pval)
names(p_reject_G4) <- "Observed Power"
p_reject_G4
## Observed Power
## 0.07104492
As with the normal distribution, reducing the sample size drastically reduces the Observed Power.
} ```