library(tidyverse)
library(openintro)
library(ggplot2)

Exercise 1

What was the estimated power of the test in the second example above?

pvalueSampling <- function(popA, popB, n=25, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

pop_Apwr <- rnorm(1000, mean = 10, sd = 2)
pop_Bpwr <- rnorm(1000, mean = 11, sd = 2) 

p_valuepwr <- data.frame(pval = pvalueSampling(pop_Apwr, pop_Bpwr, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_valuepwr, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject <- p.less(p_valuepwr$pval)
names(p_reject) <- "Estimated Power"
p_reject
## Estimated Power 
##       0.3276367

The estimated power is 0.2285156.

Exercise 2

Modify the example to compare sample means when Cohens’s D is 1.0 and 1.5. How does the observed power change?

popA_CD1 <- rnorm(1000, mean = 10, sd = 2)
popB_CD1 <- rnorm(1000, mean = 12, sd = 2) 

pvalue_CD1 <- data.frame(pval = pvalueSampling(popA_CD1, popB_CD1, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_CD1, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject_CD1 <- p.less(pvalue_CD1$pval)
names(preject_CD1) <- "Observed Power"
preject_CD1
## Observed Power 
##      0.8662109
popA_CD1.5 <- rnorm(1000, mean = 9, sd = 2)
popB_CD1.5 <- rnorm(1000, mean = 12, sd = 2) 

pvalue_CD1.5 <- data.frame(pval = pvalueSampling(popA_CD1.5, popB_CD1.5, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_CD1.5, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject_CD1.5 <- p.less(pvalue_CD1.5$pval)
names(preject_CD1.5) <- "Observed Power"
preject_CD1.5
## Observed Power 
##      0.9975586

As the Cohen’s D value increases, so does the observed power.

Exercise 3

Reduce the sample sizes to n=9, and compare to one of your previous cases. How does the observed power change?

pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

# Cohen's D = 0.5

pop_a <- rnorm(1000, mean = 10, sd = 2)
pop_b <- rnorm(1000, mean = 11, sd = 2) 

p_value <- data.frame(pval = pvalueSampling(pop_a, pop_b, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject<- p.less(p_value$pval)
names(p_reject) <- "Observed Power"
p_reject
## Observed Power 
##      0.1013184

Decreasing the sample size decreases the power. With a sample size of 25 the power was .2285156, however decreased to a sample size of 9 decreased the power to .08984375.

Exercise 4

Perform a similar comparison where the underlying populations are not normally distributed. You might try either a uniform distribution( runif(n, min, max)) or a gamma distribution (rgamma(n, shape, rate)).

popA <- rgamma(1000, shape = 4, rate = 1)
popB <- rgamma(1000, shape = 5, rate = 1 ) 

pvalue_1 <- data.frame(pval = pvalueSampling(popA, popB, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_1, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject1 <- p.less(pvalue_1$pval)
names(preject1) <- "Observed Power"
preject1
## Observed Power 
##     0.08105469
popA2 <- rgamma(1000, shape = 4, rate = 1)
popB2 <- rgamma(1000, shape = 6.5, rate = 1 ) 

pvalue_2 <- data.frame(pval = pvalueSampling(popA2, popB2, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_2, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject2 <- p.less(pvalue_2$pval)
names(preject2) <- "Observed Power"
preject2
## Observed Power 
##      0.4665527
popA3 <- rgamma(1000, shape = 4, rate = 1)
popB3 <- rgamma(1000, shape = 7.5, rate = 1 ) 

pvalue_3 <- data.frame(pval = pvalueSampling(popA3, popB3, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_3, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject3 <- p.less(pvalue_3$pval)
names(preject3) <- "Observed Power"
preject3
## Observed Power 
##      0.7233887
pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

popA4 <- rgamma(1000, shape = 4, rate = 1)
popB4 <- rgamma(1000, shape = 5, rate = 1 ) 

pvalue_4 <- data.frame(pval = pvalueSampling(popA4, popB4, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_4, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject4<- p.less(pvalue_4$pval)
names(preject4) <- "Observed Power"
preject4
## Observed Power 
##     0.08374023

Much like a normal distribution, a gamma distribution also increases as the Cohen’s D value increases. We also see that decreasing sample size, also decreases the observed power. …

---
title: "A P-Value Sampling Distribution"
author: "Hailey Jensen"
date: "11/20/2021"
output: openintro::lab_report
---

```{r load-packages, message=FALSE}
library(tidyverse)
library(openintro)
library(ggplot2)
```

### Exercise 1

What was the estimated power of the test in the second example above?

```{r code-chunk-label}
pvalueSampling <- function(popA, popB, n=25, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

pop_Apwr <- rnorm(1000, mean = 10, sd = 2)
pop_Bpwr <- rnorm(1000, mean = 11, sd = 2) 

p_valuepwr <- data.frame(pval = pvalueSampling(pop_Apwr, pop_Bpwr, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_valuepwr, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject <- p.less(p_valuepwr$pval)
names(p_reject) <- "Estimated Power"
p_reject
```

The estimated power is 0.2285156.

### Exercise 2

Modify the example to compare sample means when Cohens’s D is 1.0 and 1.5. How does the observed power change?

```{r}
popA_CD1 <- rnorm(1000, mean = 10, sd = 2)
popB_CD1 <- rnorm(1000, mean = 12, sd = 2) 

pvalue_CD1 <- data.frame(pval = pvalueSampling(popA_CD1, popB_CD1, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_CD1, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject_CD1 <- p.less(pvalue_CD1$pval)
names(preject_CD1) <- "Observed Power"
preject_CD1

popA_CD1.5 <- rnorm(1000, mean = 9, sd = 2)
popB_CD1.5 <- rnorm(1000, mean = 12, sd = 2) 

pvalue_CD1.5 <- data.frame(pval = pvalueSampling(popA_CD1.5, popB_CD1.5, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_CD1.5, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject_CD1.5 <- p.less(pvalue_CD1.5$pval)
names(preject_CD1.5) <- "Observed Power"
preject_CD1.5
```
As the Cohen's D value increases, so does the observed power.

### Exercise 3

Reduce the sample sizes to n=9, and compare to one of your previous cases. How does the observed power change?

```{r}
pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

# Cohen's D = 0.5

pop_a <- rnorm(1000, mean = 10, sd = 2)
pop_b <- rnorm(1000, mean = 11, sd = 2) 

p_value <- data.frame(pval = pvalueSampling(pop_a, pop_b, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=p_value, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

p_reject<- p.less(p_value$pval)
names(p_reject) <- "Observed Power"
p_reject
```
Decreasing the sample size decreases the power. With a sample size of 25 the power was .2285156, however decreased to a sample size of 9 decreased the power to .08984375.

### Exercise 4

Perform a similar comparison where the underlying populations are not normally distributed. You might try either a uniform distribution( runif(n, min, max)) or a gamma distribution (rgamma(n, shape, rate)).

```{r}
popA <- rgamma(1000, shape = 4, rate = 1)
popB <- rgamma(1000, shape = 5, rate = 1 ) 

pvalue_1 <- data.frame(pval = pvalueSampling(popA, popB, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_1, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject1 <- p.less(pvalue_1$pval)
names(preject1) <- "Observed Power"
preject1

popA2 <- rgamma(1000, shape = 4, rate = 1)
popB2 <- rgamma(1000, shape = 6.5, rate = 1 ) 

pvalue_2 <- data.frame(pval = pvalueSampling(popA2, popB2, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_2, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject2 <- p.less(pvalue_2$pval)
names(preject2) <- "Observed Power"
preject2

popA3 <- rgamma(1000, shape = 4, rate = 1)
popB3 <- rgamma(1000, shape = 7.5, rate = 1 ) 

pvalue_3 <- data.frame(pval = pvalueSampling(popA3, popB3, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_3, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject3 <- p.less(pvalue_3$pval)
names(preject3) <- "Observed Power"
preject3

pvalueSampling <- function(popA, popB, n=9, REPS=1000) {
  dist <- array(data=NA, dim=REPS)
  for (i in 1:REPS){
    A <- sample(popA, size=n)
    B <- sample(popB, size=n)
    H <- t.test(A, B, alternative="two.sided")    
    dist[i] <- H$p.value
  }
  return(dist)
}

p.less <- function(A, limit=0.05/2) {
  return( length(A[A < limit])/length(A) )
}

popA4 <- rgamma(1000, shape = 4, rate = 1)
popB4 <- rgamma(1000, shape = 5, rate = 1 ) 

pvalue_4 <- data.frame(pval = pvalueSampling(popA4, popB4, REPS = 4096))
ALPHA <- 0.05/2

ggplot(data=pvalue_4, aes(pval)) + 
  geom_histogram(aes(y=..density..), bins=64, 
                 col="orange", fill="green") +
  geom_density(col="red") +
  geom_vline(xintercept=ALPHA, color="red") +
  labs(title="Sampling Distribution of p-values", x = "p-value", y = "frequency") +
  theme_minimal(12)

preject4<- p.less(pvalue_4$pval)
names(preject4) <- "Observed Power"
preject4
```
Much like a normal distribution, a gamma distribution also increases as the Cohen's D value increases. We also see that decreasing sample size, also decreases the observed power.
...

