Introduction

This report investigates whether women on hormonal birth control have higher basal levels of estrogen than those who are not.

Setup & Data Import

Load required libraries

rm(list=ls())

library(tidyverse)
library(readxl)
library(car)
library(FSA)

Load BC & Aurora data

bc_data <- read_excel("AURORA_Contraceptives Progression.xlsx", sheet = "Pre_Contcp") %>%
  select(PID, `Progestin only / Estrogen containing`) %>%
  rename(BC = `Progestin only / Estrogen containing`)

aurora_data <- read_csv("full_aurora.csv")

Load Estrogen data

# Load estrogen concentration data
e2_conc <- read.csv("E2_concentrated.csv") %>%
  mutate(
    mean_conc = as.numeric(Mean.Concentration..pg.mL.),
    CV = as.numeric(`X..CV`)
  ) %>%
  filter(
    mean_conc > 0,
    mean_conc < 420,
    CV > 0
  ) %>%
  select(Inventory.Code, mean_conc, CV) %>%
  mutate(sqrt_conc = sqrt(mean_conc))

# Load PID key for estrogen data
pid_key <- read_excel("e2_pid_key.xlsx")

Data Cleaning & Prep

Key E2 Data with PID

e2_keyed <- e2_conc %>%
  inner_join(pid_key, by = c("Inventory.Code" = "InventoryCode"))

# Filter for specific time points
e2_time_filtered <- e2_keyed %>%
  filter(AlternateID == "T0")  # Basal level at time of ED visit

Merge E2 + Aurora

e2_full <- e2_time_filtered %>%
  left_join(aurora_data, by = "PID") %>%
  select(
    PID,
    ED_Age,
    ED_HighestGrade,
    BMI,
    ED_RaceEthCode,
    mean_conc,
    ED_NowPain_C,
    WK8_Pain_C,
    M3_Pain_C,
    M6_Pain_C,
    AlternateID
  ) %>%
  rename(
    ED_Pain = ED_NowPain_C,
    WK8_Pain = WK8_Pain_C,
    M3_Pain = M3_Pain_C,
    M6_Pain = M6_Pain_C,
    Race = ED_RaceEthCode
  ) %>%
  filter(!is.na(mean_conc))

Merge BC + E2

# Merge birth control data with estrogen and Aurora data
combined_data <- bc_data %>%
  full_join(e2_full, by = "PID") %>%
  filter(AlternateID == "T0")  # Ensure basal estrogen levels

# Recode NA values in the 'BC' column to 'None'
combined_data <- combined_data %>%
  mutate(BC = ifelse(is.na(BC), "None", BC))

# Convert 'BC' to a factor with explicit levels
combined_data <- combined_data %>%
  mutate(BC = factor(BC, levels = c("Progestin only", "Estrogen containing", "None")))

Analysis

Summary Statistics

# Calculate mean mean_conc for each BC category
mean_estrogen <- combined_data %>%
  group_by(BC) %>%
  summarise(mean_sqrt_conc = mean(mean_conc, na.rm = TRUE),
            count = n())

# Display the summary
print(mean_estrogen)

Visualizations

gghisto <- list(
  theme(
    axis.text.x = element_text(face = "bold", color = "cornflowerblue", size = 12),
    axis.text.y = element_text(face = "bold", color = "royalblue4", size = 12),
    axis.title = element_text(size = 14, face = "bold"),
    plot.title = element_text(size = 14, face = "bold.italic"),
    panel.border = element_rect(color = "black", fill = NA, size = 1)))

ggplot(combined_data, aes(x = BC, y = mean_conc, color = BC)) +
  geom_boxplot(color = "black", alpha = 0.8, width = 0.6) +
  geom_jitter(aes(fill = BC),shape = 21, color = "black",  
    width = 0.2, alpha = 0.7, size = 4      
  ) +
  labs(
    title = "Basal Estrogen Levels by Birth Control Category",
    x = "Birth Control Category",
    y = "E2 Concentration (pg/mL)"
  ) +
  gghisto +
  theme(legend.position = "none")

Normality/Homogeneity

analysis_data <- combined_data

# Shapiro-Wilk test for each group
shapiro_progestin <- shapiro.test(analysis_data$mean_conc[analysis_data$BC == "Progestin only"])
shapiro_estrogen <- shapiro.test(analysis_data$mean_conc[analysis_data$BC == "Estrogen containing"])

# Print results
print(shapiro_progestin)

    Shapiro-Wilk normality test

data:  analysis_data$mean_conc[analysis_data$BC == "Progestin only"]
W = 0.66494, p-value = 2.546e-06
print(shapiro_estrogen)

    Shapiro-Wilk normality test

data:  analysis_data$mean_conc[analysis_data$BC == "Estrogen containing"]
W = 0.41389, p-value = 4.381e-06
# Q-Q Plots
ggplot(analysis_data, aes(sample = mean_conc, color = BC)) +
  stat_qq() +
  stat_qq_line() +
  facet_wrap(~ BC) +
  theme_minimal() +
  labs(title = "Q-Q Plots for Estrogen Concentration by BC Category") + gghisto+
  theme(legend.position = "none")

# Levene's Test
levene_test <- leveneTest(mean_conc ~ BC, data = analysis_data)
print(levene_test)
Levene's Test for Homogeneity of Variance (center = median)
       Df F value Pr(>F)
group   2  1.0734 0.3425
      555               

KW Test & Dunn’s

kruskal_result <- kruskal.test(mean_conc ~ BC, data = analysis_data)
dunn_test <- dunnTest(mean_conc ~ BC, data = analysis_data)

print(kruskal_result);print(dunn_test)

    Kruskal-Wallis rank sum test

data:  mean_conc by BC
Kruskal-Wallis chi-squared = 0.63846, df = 2, p-value = 0.7267
Dunn (1964) Kruskal-Wallis multiple comparison
  p-values adjusted with the Holm method.
LS0tCnRpdGxlOiAiSG9ybW9uYWwgQmlydGggQ29udHJvbCBhbmQgRXN0cm9nZW4gTGV2ZWxzIEFuYWx5c2lzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyMgSW50cm9kdWN0aW9uCgoqVGhpcyByZXBvcnQgaW52ZXN0aWdhdGVzIHdoZXRoZXIgd29tZW4gb24gaG9ybW9uYWwgYmlydGggY29udHJvbCBoYXZlIGhpZ2hlciBiYXNhbCBsZXZlbHMgb2YgZXN0cm9nZW4gdGhhbiB0aG9zZSB3aG8gYXJlIG5vdC4qCgojIyMgU2V0dXAgJiBEYXRhIEltcG9ydAoKIyMjIyBMb2FkIHJlcXVpcmVkIGxpYnJhcmllcwoKYGBge3Igc2V0dXAsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0V9CnJtKGxpc3Q9bHMoKSkKCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHJlYWR4bCkKbGlicmFyeShjYXIpCmxpYnJhcnkoRlNBKQpgYGAKCiMjIyMgTG9hZCBCQyAmIEF1cm9yYSBkYXRhCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQpiY19kYXRhIDwtIHJlYWRfZXhjZWwoIkFVUk9SQV9Db250cmFjZXB0aXZlcyBQcm9ncmVzc2lvbi54bHN4Iiwgc2hlZXQgPSAiUHJlX0NvbnRjcCIpICU+JQogIHNlbGVjdChQSUQsIGBQcm9nZXN0aW4gb25seSAvIEVzdHJvZ2VuIGNvbnRhaW5pbmdgKSAlPiUKICByZW5hbWUoQkMgPSBgUHJvZ2VzdGluIG9ubHkgLyBFc3Ryb2dlbiBjb250YWluaW5nYCkKCmF1cm9yYV9kYXRhIDwtIHJlYWRfY3N2KCJmdWxsX2F1cm9yYS5jc3YiKQpgYGAKCiMjIyMgTG9hZCBFc3Ryb2dlbiBkYXRhCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQojIExvYWQgZXN0cm9nZW4gY29uY2VudHJhdGlvbiBkYXRhCmUyX2NvbmMgPC0gcmVhZC5jc3YoIkUyX2NvbmNlbnRyYXRlZC5jc3YiKSAlPiUKICBtdXRhdGUoCiAgICBtZWFuX2NvbmMgPSBhcy5udW1lcmljKE1lYW4uQ29uY2VudHJhdGlvbi4ucGcubUwuKSwKICAgIENWID0gYXMubnVtZXJpYyhgWC4uQ1ZgKQogICkgJT4lCiAgZmlsdGVyKAogICAgbWVhbl9jb25jID4gMCwKICAgIG1lYW5fY29uYyA8IDQyMCwKICAgIENWID4gMAogICkgJT4lCiAgc2VsZWN0KEludmVudG9yeS5Db2RlLCBtZWFuX2NvbmMsIENWKSAlPiUKICBtdXRhdGUoc3FydF9jb25jID0gc3FydChtZWFuX2NvbmMpKQoKIyBMb2FkIFBJRCBrZXkgZm9yIGVzdHJvZ2VuIGRhdGEKcGlkX2tleSA8LSByZWFkX2V4Y2VsKCJlMl9waWRfa2V5Lnhsc3giKQpgYGAKCiMjIyBEYXRhIENsZWFuaW5nICYgUHJlcAoKIyMjIyBLZXkgRTIgRGF0YSB3aXRoIFBJRApgYGB7cn0KZTJfa2V5ZWQgPC0gZTJfY29uYyAlPiUKICBpbm5lcl9qb2luKHBpZF9rZXksIGJ5ID0gYygiSW52ZW50b3J5LkNvZGUiID0gIkludmVudG9yeUNvZGUiKSkKCiMgRmlsdGVyIGZvciBzcGVjaWZpYyB0aW1lIHBvaW50cwplMl90aW1lX2ZpbHRlcmVkIDwtIGUyX2tleWVkICU+JQogIGZpbHRlcihBbHRlcm5hdGVJRCA9PSAiVDAiKSAgIyBCYXNhbCBsZXZlbCBhdCB0aW1lIG9mIEVEIHZpc2l0CmBgYAoKIyMjIyBNZXJnZSBFMiArIEF1cm9yYSAKYGBge3J9CmUyX2Z1bGwgPC0gZTJfdGltZV9maWx0ZXJlZCAlPiUKICBsZWZ0X2pvaW4oYXVyb3JhX2RhdGEsIGJ5ID0gIlBJRCIpICU+JQogIHNlbGVjdCgKICAgIFBJRCwKICAgIEVEX0FnZSwKICAgIEVEX0hpZ2hlc3RHcmFkZSwKICAgIEJNSSwKICAgIEVEX1JhY2VFdGhDb2RlLAogICAgbWVhbl9jb25jLAogICAgRURfTm93UGFpbl9DLAogICAgV0s4X1BhaW5fQywKICAgIE0zX1BhaW5fQywKICAgIE02X1BhaW5fQywKICAgIEFsdGVybmF0ZUlECiAgKSAlPiUKICByZW5hbWUoCiAgICBFRF9QYWluID0gRURfTm93UGFpbl9DLAogICAgV0s4X1BhaW4gPSBXSzhfUGFpbl9DLAogICAgTTNfUGFpbiA9IE0zX1BhaW5fQywKICAgIE02X1BhaW4gPSBNNl9QYWluX0MsCiAgICBSYWNlID0gRURfUmFjZUV0aENvZGUKICApICU+JQogIGZpbHRlcighaXMubmEobWVhbl9jb25jKSkKYGBgCgojIyMjIE1lcmdlIEJDICsgRTIKYGBge3J9CiMgTWVyZ2UgYmlydGggY29udHJvbCBkYXRhIHdpdGggZXN0cm9nZW4gYW5kIEF1cm9yYSBkYXRhCmNvbWJpbmVkX2RhdGEgPC0gYmNfZGF0YSAlPiUKICBmdWxsX2pvaW4oZTJfZnVsbCwgYnkgPSAiUElEIikgJT4lCiAgZmlsdGVyKEFsdGVybmF0ZUlEID09ICJUMCIpICAjIEVuc3VyZSBiYXNhbCBlc3Ryb2dlbiBsZXZlbHMKCiMgUmVjb2RlIE5BIHZhbHVlcyBpbiB0aGUgJ0JDJyBjb2x1bW4gdG8gJ05vbmUnCmNvbWJpbmVkX2RhdGEgPC0gY29tYmluZWRfZGF0YSAlPiUKICBtdXRhdGUoQkMgPSBpZmVsc2UoaXMubmEoQkMpLCAiTm9uZSIsIEJDKSkKCiMgQ29udmVydCAnQkMnIHRvIGEgZmFjdG9yIHdpdGggZXhwbGljaXQgbGV2ZWxzCmNvbWJpbmVkX2RhdGEgPC0gY29tYmluZWRfZGF0YSAlPiUKICBtdXRhdGUoQkMgPSBmYWN0b3IoQkMsIGxldmVscyA9IGMoIlByb2dlc3RpbiBvbmx5IiwgIkVzdHJvZ2VuIGNvbnRhaW5pbmciLCAiTm9uZSIpKSkKYGBgCgojIyMgQW5hbHlzaXMKCiMjIyMgU3VtbWFyeSBTdGF0aXN0aWNzCmBgYHtyfQojIENhbGN1bGF0ZSBtZWFuIG1lYW5fY29uYyBmb3IgZWFjaCBCQyBjYXRlZ29yeQptZWFuX2VzdHJvZ2VuIDwtIGNvbWJpbmVkX2RhdGEgJT4lCiAgZ3JvdXBfYnkoQkMpICU+JQogIHN1bW1hcmlzZShtZWFuX3NxcnRfY29uYyA9IG1lYW4obWVhbl9jb25jLCBuYS5ybSA9IFRSVUUpLAogICAgICAgICAgICBjb3VudCA9IG4oKSkKCiMgRGlzcGxheSB0aGUgc3VtbWFyeQpwcmludChtZWFuX2VzdHJvZ2VuKQpgYGAKCiMjIyMgVmlzdWFsaXphdGlvbnMKYGBge3J9CmdnaGlzdG8gPC0gbGlzdCgKICB0aGVtZSgKICAgIGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGZhY2UgPSAiYm9sZCIsIGNvbG9yID0gImNvcm5mbG93ZXJibHVlIiwgc2l6ZSA9IDEyKSwKICAgIGF4aXMudGV4dC55ID0gZWxlbWVudF90ZXh0KGZhY2UgPSAiYm9sZCIsIGNvbG9yID0gInJveWFsYmx1ZTQiLCBzaXplID0gMTIpLAogICAgYXhpcy50aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIGZhY2UgPSAiYm9sZCIpLAogICAgcGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIGZhY2UgPSAiYm9sZC5pdGFsaWMiKSwKICAgIHBhbmVsLmJvcmRlciA9IGVsZW1lbnRfcmVjdChjb2xvciA9ICJibGFjayIsIGZpbGwgPSBOQSwgc2l6ZSA9IDEpKSkKCmdncGxvdChjb21iaW5lZF9kYXRhLCBhZXMoeCA9IEJDLCB5ID0gbWVhbl9jb25jLCBjb2xvciA9IEJDKSkgKwogIGdlb21fYm94cGxvdChjb2xvciA9ICJibGFjayIsIGFscGhhID0gMC44LCB3aWR0aCA9IDAuNikgKwogIGdlb21faml0dGVyKGFlcyhmaWxsID0gQkMpLHNoYXBlID0gMjEsIGNvbG9yID0gImJsYWNrIiwgIAogICAgd2lkdGggPSAwLjIsIGFscGhhID0gMC43LCBzaXplID0gNCAgICAgIAogICkgKwogIGxhYnMoCiAgICB0aXRsZSA9ICJCYXNhbCBFc3Ryb2dlbiBMZXZlbHMgYnkgQmlydGggQ29udHJvbCBDYXRlZ29yeSIsCiAgICB4ID0gIkJpcnRoIENvbnRyb2wgQ2F0ZWdvcnkiLAogICAgeSA9ICJFMiBDb25jZW50cmF0aW9uIChwZy9tTCkiCiAgKSArCiAgZ2doaXN0byArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKQpgYGAKCgojIyMjIE5vcm1hbGl0eS9Ib21vZ2VuZWl0eSAKCmBgYHtyfQphbmFseXNpc19kYXRhIDwtIGNvbWJpbmVkX2RhdGEKCiMgU2hhcGlyby1XaWxrIHRlc3QgZm9yIGVhY2ggZ3JvdXAKc2hhcGlyb19wcm9nZXN0aW4gPC0gc2hhcGlyby50ZXN0KGFuYWx5c2lzX2RhdGEkbWVhbl9jb25jW2FuYWx5c2lzX2RhdGEkQkMgPT0gIlByb2dlc3RpbiBvbmx5Il0pCnNoYXBpcm9fZXN0cm9nZW4gPC0gc2hhcGlyby50ZXN0KGFuYWx5c2lzX2RhdGEkbWVhbl9jb25jW2FuYWx5c2lzX2RhdGEkQkMgPT0gIkVzdHJvZ2VuIGNvbnRhaW5pbmciXSkKCiMgUHJpbnQgcmVzdWx0cwpwcmludChzaGFwaXJvX3Byb2dlc3RpbikKcHJpbnQoc2hhcGlyb19lc3Ryb2dlbikKCiMgUS1RIFBsb3RzCmdncGxvdChhbmFseXNpc19kYXRhLCBhZXMoc2FtcGxlID0gbWVhbl9jb25jLCBjb2xvciA9IEJDKSkgKwogIHN0YXRfcXEoKSArCiAgc3RhdF9xcV9saW5lKCkgKwogIGZhY2V0X3dyYXAofiBCQykgKwogIHRoZW1lX21pbmltYWwoKSArCiAgbGFicyh0aXRsZSA9ICJRLVEgUGxvdHMgZm9yIEVzdHJvZ2VuIENvbmNlbnRyYXRpb24gYnkgQkMgQ2F0ZWdvcnkiKSArIGdnaGlzdG8rCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKQpgYGAKCgpgYGB7cn0KIyBMZXZlbmUncyBUZXN0CmxldmVuZV90ZXN0IDwtIGxldmVuZVRlc3QobWVhbl9jb25jIH4gQkMsIGRhdGEgPSBhbmFseXNpc19kYXRhKQpwcmludChsZXZlbmVfdGVzdCkKYGBgCgojIyMjIEtXIFRlc3QgJiBEdW5uJ3MgCgpgYGB7cn0Ka3J1c2thbF9yZXN1bHQgPC0ga3J1c2thbC50ZXN0KG1lYW5fY29uYyB+IEJDLCBkYXRhID0gYW5hbHlzaXNfZGF0YSkKZHVubl90ZXN0IDwtIGR1bm5UZXN0KG1lYW5fY29uYyB+IEJDLCBkYXRhID0gYW5hbHlzaXNfZGF0YSkKCnByaW50KGtydXNrYWxfcmVzdWx0KTtwcmludChkdW5uX3Rlc3QpCmBgYAoKCg==