This is our first markdown with randomization.
# Loop
N <- 1000
sample_cor_coef <- numeric(N)
for(i in 1:N) {
male_sample <- sample(x = male, size = length(male), replace = FALSE)
female_sample <- sample(x = female, size = length(female), replace = FALSE)
sample_cor_coef[i] <- cor(male_sample, female_sample)
}
# Plot
# Basic
library(tidyverse)
ggplot() +
geom_density(aes(sample_cor_coef)) +
geom_vline(aes(xintercept = obs_cor_coef), col = "tomato") +
theme_bw() +
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0))
# Fancier
dens <- density(sample_cor_coef)
data <- tibble(x = dens$x, y = dens$y) %>%
mutate(variable = case_when(
(x >= obs_cor_coef) ~ "higher",
(x < obs_cor_coef) ~ "lower",
TRUE ~ NA_character_))
ggplot(data, aes(x, y)) + geom_line() +
geom_area(data = filter(data, variable == 'higher'), fill = 'tomato') +
geom_area(data = filter(data, variable == 'lower'), fill = 'grey') +
geom_vline(aes(xintercept = obs_cor_coef)) +
theme_bw() +
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0))
# p-value
sum(sample_cor_coef > obs_cor_coef)/N
## [1] 0.001