Introduction

This is our first markdown with randomization.

# Loop
N <- 1000

sample_cor_coef <- numeric(N)

for(i in 1:N) {
  
  male_sample <- sample(x = male, size = length(male), replace = FALSE)
  female_sample <- sample(x = female, size = length(female), replace = FALSE)
  sample_cor_coef[i] <- cor(male_sample, female_sample)
  
}

# Plot

# Basic
library(tidyverse)

ggplot() +
  geom_density(aes(sample_cor_coef)) +
  geom_vline(aes(xintercept = obs_cor_coef), col = "tomato")  +
  theme_bw() + 
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0))

# Fancier

dens <- density(sample_cor_coef)

data <- tibble(x = dens$x, y = dens$y) %>% 
  mutate(variable = case_when(
    (x >= obs_cor_coef) ~ "higher",
    (x < obs_cor_coef) ~ "lower",
    TRUE ~ NA_character_))

ggplot(data, aes(x, y)) + geom_line() +
  geom_area(data = filter(data, variable == 'higher'), fill = 'tomato') + 
  geom_area(data = filter(data, variable == 'lower'), fill = 'grey') +
  geom_vline(aes(xintercept = obs_cor_coef))  +
  theme_bw() + 
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0))

# p-value

sum(sample_cor_coef > obs_cor_coef)/N
## [1] 0.001