miRNA MARCH 2024 FEMALE ONLY Associating E2

#library(edgeR);#library(sva)
library(data.table);library(ggfortify)

## Loading required package: ggplot2

library(factoextra);library(ClassDiscovery)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

## Loading required package: cluster

## Loading required package: oompaBase

library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)

## 
## Attaching package: 'memisc'

## The following object is masked from 'package:ggplot2':
## 
##     syms

## The following objects are masked from 'package:stats':
## 
##     contr.sum, contr.treatment, contrasts

## The following object is masked from 'package:base':
## 
##     as.array

library(tidyverse);library(naniar);library(gtsummary)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%()             masks memisc::%@%()
## ✖ lubridate::as.interval() masks memisc::as.interval()
## ✖ dplyr::between()         masks data.table::between()
## ✖ dplyr::collect()         masks memisc::collect()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ dplyr::first()           masks data.table::first()
## ✖ dplyr::group_rows()      masks kableExtra::group_rows()
## ✖ lubridate::hour()        masks data.table::hour()
## ✖ lubridate::is.interval() masks memisc::is.interval()
## ✖ lubridate::isoweek()     masks data.table::isoweek()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ dplyr::last()            masks data.table::last()
## ✖ lubridate::mday()        masks data.table::mday()
## ✖ lubridate::minute()      masks data.table::minute()
## ✖ lubridate::month()       masks data.table::month()
## ✖ lubridate::quarter()     masks data.table::quarter()
## ✖ dplyr::recode()          masks memisc::recode()
## ✖ dplyr::rename()          masks memisc::rename()
## ✖ lubridate::second()      masks data.table::second()
## ✖ dplyr::select()          masks MASS::select()
## ✖ dplyr::syms()            masks memisc::syms(), ggplot2::syms()
## ✖ purrr::transpose()       masks data.table::transpose()
## ✖ tibble::view()           masks memisc::view()
## ✖ lubridate::wday()        masks data.table::wday()
## ✖ lubridate::week()        masks data.table::week()
## ✖ lubridate::yday()        masks data.table::yday()
## ✖ lubridate::year()        masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'gtsummary'
## 
## 
## The following object is masked from 'package:MASS':
## 
##     select

require(lme4); require(nnet); require(tidyverse);

## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: nnet

library(broom);library(readxl);library(nlme)

## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:lme4':
## 
##     lmList
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse

NEW DATA FILES

#aurora data
df0 <- read.csv("full_aurora.csv")

estrogen data

#estrogen levels
e2_conc <- read.csv("E2_concentrated.csv")
#e2_od <- read.csv("E2_OD.csv") #Do I need this data?

#filter out the 0 values, 420 values, and 0 CV values
e2_conc$Mean.Concentration..pg.mL. <- as.numeric(e2_conc$Mean.Concentration..pg.mL.)

## Warning: NAs introduced by coercion

e2_conc$X..CV <- as.numeric(e2_conc$X..CV)

## Warning: NAs introduced by coercion

#filter add bad samples
e2_conc <- e2_conc %>% 
  rename(mean.conc = Mean.Concentration..pg.mL.) %>% 
  filter(mean.conc > 0) %>%
  filter(mean.conc < 420) %>%
  filter(`X..CV` > 0) %>%
  rename(CV = X..CV)

#square root transform concentration
e2_conc <- e2_conc %>%
  mutate(sqrt.conc = sqrt(mean.conc)) %>%
  select(everything(),mean.conc, sqrt.conc)

#remove extra columns
e2_conc <- e2_conc[, -c(4, 5, 7, 9, 10)] #dimensions 860 x 6

keys

#PID to inventory ID key for estrogen
key <- read_excel("e2_pid_key.xlsx")

#sample ID to inventory ID and PID Keys for miRNA
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")

miRNA data

#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned) 
#miRNA_cleaned dimensions 596 x 1552

Link estrogen data with key

e2_keyed <- e2_conc %>% 
  right_join(key,by=c("Inventory.Code" = "InventoryCode")) 

#the key has 906 inventory ID/PIDs and the estrogen only has 860 samples

Link miRNA with keys

ID2 <- ID_link %>% 
  right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub")) 

ID_miRNA <- miRNA_cleaned %>%
  mutate(sample=as.numeric(sample)) %>% 
  left_join(ID2,by="sample") #596 samples

length(unique(ID_miRNA$PID)) # unique PID 294

## [1] 294

subset to only the timepoint 0/ED visit miRNA values

ID_miRNA_T0 <- ID_miRNA %>% 
  filter(`ALT ID` == "T0") #dimensions 294 x 1557

ORIGNAL aurora data, estrogen

#Use the data from Jarred directly, not read in the data saved, because it will change some variable names format
df <- read.csv("AURORA_estrogen_excl_outliers_excl_BMI.csv") # dimensions 283 x 3035

E2_women_excl_outliers_excl_BMI <- df[df$ED_GenderBirthCert == 2,] #dim 197 x 3035 
#E2_men_excl_outliers_excl_BMI <- df[df$ED_GenderBirthCert == 1,] #86, 3035 #not using men rn 

###E2_women (excluding outliers but added BMI) #table 2
E2_women_wide <- E2_women_excl_outliers_excl_BMI[,c("PID","quartile","tertile","median","ED_Age","HighestGrade","BMI","WK8_Pain","M3_Pain","M6_Pain","E2.Concentration..pg.ml._T0_sqrt","ED_NowPain.x","Race","Date.Run_T0")] #dimensions 197 x 14

summary(E2_women_wide)

##       PID            quartile        tertile          median     
##  Min.   :100183   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:102989   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :106076   Median :2.000   Median :2.000   Median :1.000  
##  Mean   :105821   Mean   :2.492   Mean   :1.995   Mean   :1.497  
##  3rd Qu.:108692   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :111301   Max.   :4.000   Max.   :3.000   Max.   :2.000  
##                                                                  
##      ED_Age       HighestGrade        BMI           WK8_Pain     
##  Min.   :18.00   Min.   :1.000   Min.   :17.30   Min.   : 0.000  
##  1st Qu.:27.00   1st Qu.:2.000   1st Qu.:25.43   1st Qu.: 3.000  
##  Median :37.00   Median :3.000   Median :30.33   Median : 5.000  
##  Mean   :39.37   Mean   :2.883   Mean   :31.43   Mean   : 5.211  
##  3rd Qu.:51.00   3rd Qu.:4.000   3rd Qu.:36.02   3rd Qu.: 8.000  
##  Max.   :73.00   Max.   :4.000   Max.   :57.50   Max.   :10.000  
##                  NA's   :1       NA's   :27      NA's   :12      
##     M3_Pain          M6_Pain       E2.Concentration..pg.ml._T0_sqrt
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.8124                 
##  1st Qu.: 2.000   1st Qu.: 0.000   1st Qu.: 4.1975                 
##  Median : 5.000   Median : 4.000   Median : 7.1516                 
##  Mean   : 4.716   Mean   : 4.085   Mean   : 8.6909                 
##  3rd Qu.: 7.000   3rd Qu.: 7.000   3rd Qu.:13.0289                 
##  Max.   :10.000   Max.   :10.000   Max.   :21.7290                 
##  NA's   :14       NA's   :21                                       
##   ED_NowPain.x         Race        Date.Run_T0   
##  Min.   : 0.000   Min.   :1.000   Min.   :44032  
##  1st Qu.: 5.000   1st Qu.:2.000   1st Qu.:44039  
##  Median : 7.000   Median :3.000   Median :44063  
##  Mean   : 6.655   Mean   :2.587   Mean   :44073  
##  3rd Qu.: 8.000   3rd Qu.:3.000   3rd Qu.:44082  
##  Max.   :10.000   Max.   :3.000   Max.   :44157  
##                   NA's   :1

length(unique(E2_women_wide[["PID"]])) #197 unique PID

## [1] 197

March 2024 data Create wide df of estrogen and pain data

#aurora data is df0 #4745 rows
#estrogen data is e2_keyed #906 rows

#combine e2 and aurora data
e2_keyed_full <- e2_keyed %>% 
  left_join(df0,by=c("PID" = "PID")) #1217 rows x 353 columns

## Warning in left_join(., df0, by = c(PID = "PID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 15 of `x` matches multiple rows in `y`.
## ℹ Row 946 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

#where is the NRS pain score
grep("NRS", names(e2_keyed_full), value = TRUE)

## [1] "ED_Somatic_NRS_SUM"  "WK2_Somatic_NRS_SUM" "WK8_Somatic_NRS_SUM"
## [4] "M3_Somatic_NRS_SUM"  "M6_Somatic_NRS_SUM"

#This is only the NRS sum count, so a value greater than 10, not the 0-10 NRS value. Will use something else.

#Do I use ED_NowPain_C or ED_Pain_C for ED pain?
#I don't have the "Date.Run_T0" value for this new set of 2024 estrogen values. maybe I don't need it ?
#added plate number to control for batch effects
#create wide df below
e2_full_wide <- e2_keyed_full[,c("PID","ED_Age","ED_HighestGrade","BMI","ED_RaceEthCode","sqrt.conc","ED_NowPain_C","WK8_Pain_C", "M3_Pain_C", "M6_Pain_C","Plate.Number", "AlternateID")] #dim 1217 x 12

e2_full_wide %>%
  inner_join(ID2,by="PID")

## Warning in inner_join(., ID2, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 123 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

##       PID ED_Age                    ED_HighestGrade  BMI     ED_RaceEthCode
## 1  101394     22               High school graduate   NA Non-Hispanic Black
## 2  101394     22               High school graduate   NA Non-Hispanic Black
## 3  101394     22               High school graduate   NA Non-Hispanic Black
## 4  101394     22               High school graduate   NA Non-Hispanic Black
## 5  101394     22               High school graduate   NA Non-Hispanic Black
## 6  101394     22               High school graduate   NA Non-Hispanic Black
## 7  101394     22               High school graduate   NA Non-Hispanic Black
## 8  101394     22               High school graduate   NA Non-Hispanic Black
## 9  110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 10 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 11 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 12 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 13 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 14 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 15 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 16 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 17 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 18 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 19 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 20 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 21 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 22 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 23 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 24 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 25 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 26 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 27 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 28 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 29 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 30 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 31 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 32 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 33 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 34 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 35 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 36 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 37 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 38 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 39 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 40 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 41 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 42 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 43 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 44 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 45 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 46 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 47 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 48 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 49 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 50 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 51 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 52 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 53 110468     41               High school graduate 51.1 Non-Hispanic White
## 54 110468     41               High school graduate 51.1 Non-Hispanic White
## 55 110468     41               High school graduate 51.1 Non-Hispanic White
## 56 110468     41               High school graduate 51.1 Non-Hispanic White
## 57 110468     41               High school graduate 51.1 Non-Hispanic White
## 58 110468     41               High school graduate 51.1 Non-Hispanic White
## 59 110468     41               High school graduate 51.1 Non-Hispanic White
## 60 110468     41               High school graduate 51.1 Non-Hispanic White
## 61 111201     21               High school graduate 22.7 Non-Hispanic Black
## 62 111201     21               High school graduate 22.7 Non-Hispanic Black
## 63 111201     21               High school graduate 22.7 Non-Hispanic Black
## 64 111201     21               High school graduate 22.7 Non-Hispanic Black
## 65 111201     21               High school graduate 22.7 Non-Hispanic Black
## 66 111201     21               High school graduate 22.7 Non-Hispanic Black
## 67 111128     29               High school graduate 32.3 Non-Hispanic White
## 68 111128     29               High school graduate 32.3 Non-Hispanic White
## 69 111128     29               High school graduate 32.3 Non-Hispanic White
## 70 111128     29               High school graduate 32.3 Non-Hispanic White
## 71 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 72 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 73 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 74 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 75 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 76 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 77 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 78 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 79 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 80 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 81 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 82 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 83 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 84 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 85 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 86 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 87 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 88 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 89 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 90 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 91 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 92 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 93 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 94 108726     43            Some college, no degree 34.0 Non-Hispanic Black
##    sqrt.conc ED_NowPain_C WK8_Pain_C M3_Pain_C M6_Pain_C Plate.Number
## 1  14.870289            6          0         0         0            4
## 2  14.870289            6          0         0         0            4
## 3  14.870289            6          0         0         0            4
## 4  14.870289            6          0         0         0            4
## 5  14.870289            6          0         0         0            4
## 6  14.870289            6          0         0         0            4
## 7  14.870289            6          0         0         0            4
## 8  14.870289            6          0         0         0            4
## 9   3.411891            4          4         5         3            9
## 10  3.411891            4          4         5         3            9
## 11  3.411891            4          4         5         3            9
## 12  3.411891            4          4         5         3            9
## 13  3.411891            4          4         5         3            9
## 14  3.411891            4          4         5         3            9
## 15  3.411891            4          4         5         3            9
## 16  3.411891            4          4         5         3            9
## 17  3.411891            4          4         5         3            9
## 18  3.411891            4          4         5         3            9
## 19  3.411891            4          4         5         3            9
## 20  3.411891            4          4         5         3            9
## 21  4.063250            5          0         0         0            9
## 22  4.063250            5          0         0         0            9
## 23  4.063250            5          0         0         0            9
## 24  4.063250            5          0         0         0            9
## 25  4.063250            5          0         0         0            9
## 26  4.063250            5          0         0         0            9
## 27  4.063250            5          0         0         0            9
## 28  4.063250            5          0         0         0            9
## 29  4.063250            5          0         0         0            9
## 30  4.063250            5          0         0         0            9
## 31  4.063250            5          0         0         0            9
## 32  4.063250            5          0         0         0            9
## 33 13.573504            4          8         8         7           10
## 34 13.573504            4          8         8         7           10
## 35 13.573504            4          8         8         7           10
## 36 13.573504            4          8         8         7           10
## 37 13.573504            4          8         8         7           10
## 38 13.573504            4          8         8         7           10
## 39 13.573504            4          8         8         7           10
## 40 13.573504            4          8         8         7           10
## 41 13.573504            4          8         8         7           10
## 42 13.573504            4          8         8         7           10
## 43 13.573504            4          8         8         7           10
## 44 13.573504            4          8         8         7           10
## 45  3.735706            6          4        NA         3           10
## 46  3.735706            6          4        NA         3           10
## 47  3.735706            6          4        NA         3           10
## 48  3.735706            6          4        NA         3           10
## 49  3.735706            6          4        NA         3           10
## 50  3.735706            6          4        NA         3           10
## 51  3.735706            6          4        NA         3           10
## 52  3.735706            6          4        NA         3           10
## 53  4.842365            7         NA         6         5           10
## 54  4.842365            7         NA         6         5           10
## 55  4.842365            7         NA         6         5           10
## 56  4.842365            7         NA         6         5           10
## 57  4.842365            7         NA         6         5           10
## 58  4.842365            7         NA         6         5           10
## 59  4.842365            7         NA         6         5           10
## 60  4.842365            7         NA         6         5           10
## 61  6.199113            7          7         6         6           11
## 62  6.199113            7          7         6         6           11
## 63  6.199113            7          7         6         6           11
## 64  6.199113            7          7         6         6           11
## 65  6.199113            7          7         6         6           11
## 66  6.199113            7          7         6         6           11
## 67  4.701542            8          7         7         6           16
## 68  4.701542            8          7         7         6           16
## 69  4.701542            8          7         7         6           16
## 70  4.701542            8          7         7         6           16
## 71  3.681304            6          7         4         8           17
## 72  3.681304            6          7         4         8           17
## 73  3.681304            6          7         4         8           17
## 74  3.681304            6          7         4         8           17
## 75  3.681304            6          7         4         8           17
## 76  3.681304            6          7         4         8           17
## 77  3.681304            6          7         4         8           17
## 78  3.681304            6          7         4         8           17
## 79  3.681304            6          7         4         8           17
## 80  3.681304            6          7         4         8           17
## 81  3.681304            6          7         4         8           17
## 82  3.681304            6          7         4         8           17
## 83  4.382009            5          6         4         4           17
## 84  4.382009            5          6         4         4           17
## 85  4.382009            5          6         4         4           17
## 86  4.382009            5          6         4         4           17
## 87  4.382009            5          6         4         4           17
## 88  4.382009            5          6         4         4           17
## 89  4.382009            5          6         4         4           17
## 90  4.382009            5          6         4         4           17
## 91  4.382009            5          6         4         4           17
## 92  4.382009            5          6         4         4           17
## 93  4.382009            5          6         4         4           17
## 94  4.382009            5          6         4         4           17
##    AlternateID SAMPLE ID ALT ID INVENTORY CODE Sample Type sample
## 1           T1  824-1024     T3      XA0050274        RNA1    522
## 2           T1  824-1024     T0      XA0045353        RNA1      2
## 3           T1  824-1024     T3      XA0050274        RNA1    522
## 4           T1  824-1024     T0      XA0045353        RNA1      2
## 5           T1  824-1024     T3      XA0050274        RNA1    522
## 6           T1  824-1024     T0      XA0045353        RNA1      2
## 7           T1  824-1024     T3      XA0050274        RNA1    522
## 8           T1  824-1024     T0      XA0045353        RNA1      2
## 9           T3  846-1109     T1      XA0055516        RNA1    336
## 10          T3  846-1109     T0      XA0055259        RNA1    148
## 11          T3  846-1109     T1      XA0055516        RNA1    336
## 12          T3  846-1109     T0      XA0055259        RNA1    148
## 13          T3  846-1109     T1      XA0055516        RNA1    336
## 14          T3  846-1109     T0      XA0055259        RNA1    148
## 15          T3  846-1109     T1      XA0055516        RNA1    336
## 16          T3  846-1109     T0      XA0055259        RNA1    148
## 17          T3  846-1109     T1      XA0055516        RNA1    336
## 18          T3  846-1109     T0      XA0055259        RNA1    148
## 19          T3  846-1109     T1      XA0055516        RNA1    336
## 20          T3  846-1109     T0      XA0055259        RNA1    148
## 21          T3  846-1112     T1      XA0055530        RNA1    341
## 22          T3  846-1112     T0      XA0055329        RNA1    430
## 23          T3  846-1112     T1      XA0055530        RNA1    341
## 24          T3  846-1112     T0      XA0055329        RNA1    430
## 25          T3  846-1112     T1      XA0055530        RNA1    341
## 26          T3  846-1112     T0      XA0055329        RNA1    430
## 27          T3  846-1112     T1      XA0055530        RNA1    341
## 28          T3  846-1112     T0      XA0055329        RNA1    430
## 29          T3  846-1112     T1      XA0055530        RNA1    341
## 30          T3  846-1112     T0      XA0055329        RNA1    430
## 31          T3  846-1112     T1      XA0055530        RNA1    341
## 32          T3  846-1112     T0      XA0055329        RNA1    430
## 33          T3  846-1110     T1      XA0055534        RNA1    343
## 34          T3  846-1110     T0      XA0055331        RNA1    151
## 35          T3  846-1110     T1      XA0055534        RNA1    343
## 36          T3  846-1110     T0      XA0055331        RNA1    151
## 37          T3  846-1110     T1      XA0055534        RNA1    343
## 38          T3  846-1110     T0      XA0055331        RNA1    151
## 39          T3  846-1110     T1      XA0055534        RNA1    343
## 40          T3  846-1110     T0      XA0055331        RNA1    151
## 41          T3  846-1110     T1      XA0055534        RNA1    343
## 42          T3  846-1110     T0      XA0055331        RNA1    151
## 43          T3  846-1110     T1      XA0055534        RNA1    343
## 44          T3  846-1110     T0      XA0055331        RNA1    151
## 45          T3  846-1117     T1      XA0055521        RNA1    338
## 46          T3  846-1117     T0      XA0055300        RNA1    429
## 47          T3  846-1117     T1      XA0055521        RNA1    338
## 48          T3  846-1117     T0      XA0055300        RNA1    429
## 49          T3  846-1117     T1      XA0055521        RNA1    338
## 50          T3  846-1117     T0      XA0055300        RNA1    429
## 51          T3  846-1117     T1      XA0055521        RNA1    338
## 52          T3  846-1117     T0      XA0055300        RNA1    429
## 53          T3  873-1054     T0      XA0054511        RNA1    299
## 54          T3  873-1054     T1      XA0054622        RNA1    305
## 55          T3  873-1054     T0      XA0054511        RNA1    299
## 56          T3  873-1054     T1      XA0054622        RNA1    305
## 57          T3  873-1054     T0      XA0054511        RNA1    299
## 58          T3  873-1054     T1      XA0054622        RNA1    305
## 59          T3  873-1054     T0      XA0054511        RNA1    299
## 60          T3  873-1054     T1      XA0054622        RNA1    305
## 61          T3  825-1110     T0      XA0055257        RNA1    168
## 62          T3  825-1110     T1      XA0055225        RNA1    575
## 63          T3  825-1110     T0      XA0055257        RNA1    168
## 64          T3  825-1110     T1      XA0055225        RNA1    575
## 65          T3  825-1110     T0      XA0055257        RNA1    168
## 66          T3  825-1110     T1      XA0055225        RNA1    575
## 67          T3  828-1216     T0      XA0054998        RNA1    573
## 68          T3  828-1216     T1      XA0054147        RNA1    119
## 69          T3  828-1216     T0      XA0054998        RNA1    573
## 70          T3  828-1216     T1      XA0054147        RNA1    119
## 71          T3  827-1008     T1      XA0047505        RNA1    485
## 72          T3  827-1008     T0      XA0045856        RNA1    450
## 73          T3  827-1008     T1      XA0047505        RNA1    485
## 74          T3  827-1008     T0      XA0045856        RNA1    450
## 75          T3  827-1008     T1      XA0047505        RNA1    485
## 76          T3  827-1008     T0      XA0045856        RNA1    450
## 77          T3  827-1008     T1      XA0047505        RNA1    485
## 78          T3  827-1008     T0      XA0045856        RNA1    450
## 79          T3  827-1008     T1      XA0047505        RNA1    485
## 80          T3  827-1008     T0      XA0045856        RNA1    450
## 81          T3  827-1008     T1      XA0047505        RNA1    485
## 82          T3  827-1008     T0      XA0045856        RNA1    450
## 83          T3  828-1145     T0      XA0053287        RNA1    406
## 84          T3  828-1145     T1      XA0053178        RNA1     86
## 85          T3  828-1145     T0      XA0053287        RNA1    406
## 86          T3  828-1145     T1      XA0053178        RNA1     86
## 87          T3  828-1145     T0      XA0053287        RNA1    406
## 88          T3  828-1145     T1      XA0053178        RNA1     86
## 89          T3  828-1145     T0      XA0053287        RNA1    406
## 90          T3  828-1145     T1      XA0053178        RNA1     86
## 91          T3  828-1145     T0      XA0053287        RNA1    406
## 92          T3  828-1145     T1      XA0053178        RNA1     86
## 93          T3  828-1145     T0      XA0053287        RNA1    406
## 94          T3  828-1145     T1      XA0053178        RNA1     86

#at this above step which includes all time points of E2 for the March 2024 data, there are 94 overlapping PIDs with the 2021 miRNA data

#rename some of the columns
e2_full_wide <- e2_full_wide %>% 
  rename(ED_Pain = ED_NowPain_C) %>% 
  rename(WK8_Pain = WK8_Pain_C) %>% 
  rename(M3_Pain = M3_Pain_C) %>% 
  rename(M6_Pain = M6_Pain_C) %>% 
  rename(Race = ED_RaceEthCode)   

#Remove cases without estrogen measurements
e2_full_wide <- e2_full_wide[complete.cases(e2_full_wide$sqrt.conc), ] #dimensions 1161 x 12

#Keep ONLY estrogen time point T0, E2 at time of ED visit
e2_full_wide <- e2_full_wide %>% 
  filter(AlternateID == "T0") #dimensions 558 x 12

#note: after the above step of keeping only T0 values, there remain no overlapping PIDs between the March 2024 E2 data and the 2021 miRNA data. 

#summarize wide df and see how many unique PIDs
#summary(e2_full_wide)
length(unique(e2_full_wide[["PID"]])) #432 unique PID

## [1] 432

NEW MARCH 2024 CODE fix the education category

unique(e2_full_wide$ED_HighestGrade)

##  [1] "High school graduate"                                            
##  [2] "Some college, no degree"                                         
##  [3] "Associate degree: Occupational, technical, or vocational program"
##  [4] "Bachelor's degree: BA, AB, BS, BBA"                              
##  [5] "GED or equivalent"                                               
##  [6] "Associate degree: Academic program"                              
##  [7] "Doctoral degree: PhD, EdD"                                       
##  [8] "Master's degree: MA, MS, MEng, MEd, MBA"                         
##  [9] "Professional school degree: MD, DDS, DVM, JD"                    
## [10] "8th grade"                                                       
## [11] "11th grade"                                                      
## [12] "12th grade, no diploma"                                          
## [13] "9th grade"                                                       
## [14] "10th grade"                                                      
## [15] "7th grade"

#reduce education categories to 4 levels
level_mapping <- c(
  "Some college, no degree" = "Some College or Associate Degree",
  "Professional school degree: MD, DDS, DVM, JD" = "Advanced Degree",
  "Associate degree: Occupational, technical, or vocational program" = "Some College or Associate Degree",
  "High school graduate" = "High School or Less",
  "Bachelor's degree: BA, AB, BS, BBA" = "Bachelor's Degree",
  "Master's degree: MA, MS, MEng, MEd, MBA" = "Advanced Degree",
  "GED or equivalent" = "High School or Less",
  "8th grade" = "High School or Less",
  "Doctoral degree: PhD, EdD" = "Advanced Degree",
  "Associate degree: Academic program" = "Some College or Associate Degree",
  "12th grade, no diploma" = "High School or Less",
  "11th grade" = "High School or Less",
  "9th grade" = "High School or Less",
  "10th grade" = "High School or Less",
  "7th grade" = "High School or Less"
)

#apply new mapping to education category
e2_full_wide$ED_HighestGrade <- factor(e2_full_wide$ED_HighestGrade, levels = names(level_mapping), labels = level_mapping)
#dim 558 x 12

Original code

gather(E2_women_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
  mutate(time = ifelse(names=="WK2_Pain",0.5,ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA))))) %>%
  arrange(PID,time) -> E2_women_long

E2_women_long <- E2_women_long[complete.cases(E2_women_long),]
length(unique(E2_women_long[["PID"]])) #164 PID

## [1] 164

#summary(E2_women_long$E2.Concentration..pg.ml._T0_sqrt)

March 2024 data Create long df of estrogen and pain data changes: updated df used, removed Week2 pain rename

gather(e2_full_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
  mutate(time = ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA)))) %>%
  arrange(PID,time) -> e2_full_long 

e2_full_long <- e2_full_long[complete.cases(e2_full_long),] #dim 1541 x 12
length(unique(e2_full_long[["PID"]])) #397 PID

## [1] 397

#summary(e2_full_long$sqrt.conc)

#there are a few random duplicates that need to be removed
e2_full_long <- e2_full_long[!duplicated(e2_full_long), ] #dim 1163 x 12

ORIGINAL GLS MODEL CODE

#model (table2a, left side)
E2_women_long$E2_Concentration_T0_sqrt = E2_women_long$E2.Concentration..pg.ml._T0_sqrt
E2_women_long$Date_Run_T0 = E2_women_long$Date.Run_T0

E2_women_model <- gls(pain ~ time + E2_Concentration_T0_sqrt + ED_Age + HighestGrade + BMI + ED_NowPain.x + Race + Date_Run_T0, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=E2_women_long)

summary(E2_women_model)

## Generalized least squares fit by REML
##   Model: pain ~ time + E2_Concentration_T0_sqrt + ED_Age + HighestGrade +      BMI + ED_NowPain.x + Race + Date_Run_T0 
##   Data: E2_women_long 
##        AIC      BIC    logLik
##   2257.843 2303.215 -1117.922
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.7745422 
## 
## Coefficients:
##                               Value Std.Error   t-value p-value
## (Intercept)              -210.08763 215.99869 -0.972634  0.3312
## time                       -0.26382   0.07157 -3.686142  0.0003
## E2_Concentration_T0_sqrt   -0.04854   0.04138 -1.173080  0.2414
## ED_Age                      0.03183   0.01658  1.920209  0.0555
## HighestGrade               -0.16961   0.24209 -0.700584  0.4839
## BMI                         0.02026   0.02838  0.713853  0.4757
## ED_NowPain.x                0.37291   0.08619  4.326428  0.0000
## Race                        0.37034   0.38705  0.956813  0.3392
## Date_Run_T0                 0.00480   0.00490  0.978950  0.3281
## 
##  Correlation: 
##                          (Intr) time   E2_C_T ED_Age HghstG BMI    ED_NP.
## time                     -0.004                                          
## E2_Concentration_T0_sqrt  0.193  0.012                                   
## ED_Age                   -0.036 -0.008  0.269                            
## HighestGrade              0.186 -0.006 -0.018 -0.300                     
## BMI                      -0.065  0.003  0.007 -0.312  0.080              
## ED_NowPain.x             -0.056 -0.004 -0.092 -0.048  0.052 -0.228       
## Race                     -0.159 -0.020 -0.082  0.018  0.159 -0.064 -0.104
## Date_Run_T0              -1.000  0.002 -0.194  0.035 -0.190  0.062  0.055
##                          Race  
## time                           
## E2_Concentration_T0_sqrt       
## ED_Age                         
## HighestGrade                   
## BMI                            
## ED_NowPain.x                   
## Race                           
## Date_Run_T0               0.154
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.11992096 -0.78910606 -0.05550526  0.79878542  2.21845145 
## 
## Residual standard error: 3.130053 
## Degrees of freedom: 466 total; 457 residual

MARCH 2024 NEW E2 DATA CODE GLS MODEL

#took out highest grade because its not coded correctly 
e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)

summary(e2_model)

## Generalized least squares fit by REML
##   Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain +      Race + Plate.Number 
##   Data: e2_full_long 
##       AIC      BIC    logLik
##   5566.51 5642.223 -2768.255
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##    Phi1 
## 0.73062 
## 
## Coefficients:
##                                         Value Std.Error   t-value p-value
## (Intercept)                         1.9370128 0.8249753  2.347965  0.0190
## time                               -0.2372621 0.0453816 -5.228156  0.0000
## sqrt.conc                           0.0097417 0.0403274  0.241564  0.8092
## ED_Age                              0.0433499 0.0096850  4.475991  0.0000
## ED_HighestGradeAdvanced Degree     -1.1040972 0.5188331 -2.128039  0.0335
## ED_HighestGradeHigh School or Less  0.0062920 0.2810954  0.022384  0.9821
## ED_HighestGradeBachelor's Degree   -0.6553079 0.3530373 -1.856200  0.0637
## BMI                                 0.0268486 0.0147667  1.818177  0.0693
## ED_Pain                             0.2596334 0.0491419  5.283337  0.0000
## RaceNon-Hispanic Black             -0.5629988 0.3746560 -1.502709  0.1332
## RaceNon-Hispanic Other             -0.4014009 0.5972709 -0.672058  0.5017
## RaceNon-Hispanic White             -0.5940566 0.3903109 -1.522009  0.1283
## Plate.Number                       -0.0234922 0.0187327 -1.254078  0.2101
## 
##  Correlation: 
##                                    (Intr) time   sqrt.c ED_Age ED_HGAD ED_SoL
## time                               -0.226                                    
## sqrt.conc                          -0.301  0.001                             
## ED_Age                             -0.409  0.004  0.251                      
## ED_HighestGradeAdvanced Degree     -0.219  0.002 -0.015 -0.098               
## ED_HighestGradeHigh School or Less -0.245  0.000  0.025  0.105  0.237        
## ED_HighestGradeBachelor's Degree   -0.156  0.000  0.008 -0.069  0.244   0.339
## BMI                                -0.465  0.006 -0.101 -0.167  0.069   0.051
## ED_Pain                            -0.444  0.002 -0.060  0.046  0.136  -0.074
## RaceNon-Hispanic Black             -0.310  0.000 -0.050 -0.064  0.221   0.042
## RaceNon-Hispanic Other             -0.258  0.002 -0.048  0.027  0.118  -0.002
## RaceNon-Hispanic White             -0.316 -0.002 -0.055 -0.089  0.131   0.059
## Plate.Number                       -0.285  0.000  0.004 -0.022  0.063   0.066
##                                    ED_HGBD BMI    ED_Pan RcN-HB RcN-HO RcN-HW
## time                                                                         
## sqrt.conc                                                                    
## ED_Age                                                                       
## ED_HighestGradeAdvanced Degree                                               
## ED_HighestGradeHigh School or Less                                           
## ED_HighestGradeBachelor's Degree                                             
## BMI                                 0.032                                    
## ED_Pain                             0.070   0.013                            
## RaceNon-Hispanic Black              0.079  -0.011  0.015                     
## RaceNon-Hispanic Other              0.048   0.022  0.096  0.475              
## RaceNon-Hispanic White             -0.045  -0.008  0.159  0.718  0.456       
## Plate.Number                        0.014   0.062  0.076 -0.038 -0.046 -0.108
## 
## Standardized residuals:
##          Min           Q1          Med           Q3          Max 
## -2.096733348 -0.819667252  0.005922671  0.718887652  2.479985354 
## 
## Residual standard error: 3.005848 
## Degrees of freedom: 1163 total; 1150 residual

ok well thats not exactly stellar. let’s see what we’re working with

#visualize pain over time
ggplot(e2_full_long, aes(x = as.factor(time), y = pain, group = PID, color = PID)) +
  geom_line() +
  geom_point() +
  labs(x = "Time Point", y = "Pain Score") +
  ggtitle("Pain Scores Over Time by PID") +
  theme_minimal()

# try something else bc thats awful
summary_df <- e2_full_long %>%
  group_by(time) %>%
  summarise(mean_pain = mean(pain), median_pain = median(pain))

# Plot mean or median pain scores at each time point
ggplot(summary_df, aes(x = time)) +
  geom_line(aes(y = mean_pain), color = "blue", size = 1.5) +  # Mean pain scores
  geom_point(aes(y = mean_pain), color = "blue", size = 3) +
  geom_line(aes(y = median_pain), color = "red", size = 1.5, linetype = "dashed") +  # Median pain scores
  geom_point(aes(y = median_pain), color = "red", size = 3) +
  geom_text(aes(x = 3.5, y = 4.2, label = "Mean"), color = "blue", size = 4, hjust = -0.2) +  # Text for mean
  geom_text(aes(x = 5, y = 4.1, label = "Median"), color = "red", size = 4, hjust = -0.2) +  # Text for median
  labs(x = "Time Point", y = "Pain Score", title = "Summary of Pain Scores Over Time") +
  scale_x_continuous(breaks = c(2, 3, 6))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning in geom_text(aes(x = 3.5, y = 4.2, label = "Mean"), color = "blue", : All aesthetics have length 1, but the data has 3 rows.
## ℹ Did you mean to use `annotate()`?

## Warning in geom_text(aes(x = 5, y = 4.1, label = "Median"), color = "red", : All aesthetics have length 1, but the data has 3 rows.
## ℹ Did you mean to use `annotate()`?

visualize estrogen versus pain over time

ggplot(e2_full_long, aes(x = sqrt.conc, y = pain, color = factor(time), shape = factor(time))) +
  geom_point(size = 3) +
  labs(x = "Estrogen Concentration (sqrt)", y = "Pain Level", color = "Time", shape = "Time") +
  scale_color_discrete(labels = c("2" = "Time 2", "3" = "Time 3", "6" = "Time 6")) +
  scale_shape_manual(values = c(16, 17, 18), labels = c("2" = "Time 2", "3" = "Time 3", "6" = "Time 6"))

ggplot(e2_full_long, aes(x = sqrt.conc, fill = factor(time))) +
  geom_density(alpha = 0.5) +
  labs(x = "Estrogen Concentration (sqrt)", y = "Density", fill = "Time") +
  facet_wrap(~ time, ncol = 1) +
  theme_minimal()

summary stats

e2_full_long %>%
  group_by(time, pain) %>%
  summarise(
    Median_E2 = median(sqrt.conc, na.rm = TRUE),
  )

## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.

## # A tibble: 33 × 3
## # Groups:   time [3]
##     time  pain Median_E2
##    <dbl> <int>     <dbl>
##  1     2     0      5.21
##  2     2     1      4.89
##  3     2     2      4.26
##  4     2     3      4.82
##  5     2     4      4.58
##  6     2     5      4.67
##  7     2     6      5.19
##  8     2     7      5.58
##  9     2     8      4.95
## 10     2     9      5.05
## # ℹ 23 more rows

ggplot(e2_full_long, aes(x = sqrt.conc, y = pain)) +
  geom_point(size=4, alpha=.1,color="firebrick4", fill="violetred",shape=21)

cor(e2_full_long$sqrt,e2_full_long$pain)

## [1] -0.00841645

idea.. refactor pain to two categories: low to none and moderate to severe

e2_full_long <- e2_full_long %>%
  mutate(bin_pain = cut(pain, breaks = c(-Inf, 4, 10), labels = c("Low to None", "Moderate to Severe"))) #dimensions 1163 x 13


#review summary stats
e2_full_long %>%
  group_by(time, bin_pain) %>%
  summarise(
    Mean_E2 = mean(sqrt.conc, na.rm = TRUE),
    Median_E2 = median(sqrt.conc, na.rm = TRUE),
  )

## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.

## # A tibble: 6 × 4
## # Groups:   time [3]
##    time bin_pain           Mean_E2 Median_E2
##   <dbl> <fct>                <dbl>     <dbl>
## 1     2 Low to None           5.58      4.80
## 2     2 Moderate to Severe    6.05      5.08
## 3     3 Low to None           5.84      4.86
## 4     3 Moderate to Severe    5.86      4.98
## 5     6 Low to None           5.87      4.86
## 6     6 Moderate to Severe    5.79      4.98

ggplot(e2_full_long, aes(x = sqrt.conc, fill = bin_pain)) +
  geom_histogram(binwidth = .2) +
  facet_grid(time ~ .) +
  labs(x = "Square Root of Estrogen Concentration", y = "Frequency", fill = "Pain Level")

ggplot(e2_full_long, aes(x = time, y = sqrt.conc, color = bin_pain)) +
  geom_point() +
  labs(x = "Time", y = "Square Root of Estrogen Concentration", color = "Pain Level")

lets look at the e2 values and re-level

#visualize
summary(e2_full_long$sqrt.conc)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.325   3.867   4.906   5.841   6.844  20.137

ggplot(data = e2_full_long, aes(x = sqrt.conc, y = ..density..)) +
  geom_density(color = "deeppink3", fill = "deeppink3", alpha = .4, size=1.8) +
  labs(x = "Estrogen Level (sqrt.conc)", y = "Density",
       title = "Density Plot of Estrogen Levels") + 
  geom_histogram(data = e2_full_long, aes(x = sqrt.conc, y = ..density..),
                 fill = "skyblue", color = "gray20", alpha = 0.9, bins = 50)

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#visualize ED estrogen levels versus pain at month 6
df_m6 <- e2_full_long %>% 
  filter(time == 6) #dimensions 397 x 13

ggplot(df_m6, aes(x = bin_pain, y = sqrt.conc)) +
  geom_boxplot() +
  labs(title = "Box Plot of sqrt.conc by bin_pain at month 6")

ggplot(df_m6, aes(x = sqrt.conc, y = pain)) +
  geom_point() +
  labs(title = "Scatter Plot of sqrt.conc versus pain at month 6",
       x = "Sqrt.Conc",
       y = "Pain")

summary table

df_m6 %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain) %>%
  tbl_summary(by = bin_pain,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p()

Characteristic	Low to None, N = 231¹	Moderate to Severe, N = 166¹	p-value²
ED_Age	36 (14)	39 (13)	0.012
ED_HighestGrade			0.2
Some College or Associate Degree	91 (39%)	68 (41%)
Advanced Degree	19 (8.2%)	8 (4.8%)
High School or Less	76 (33%)	66 (40%)
Bachelor's Degree	45 (19%)	24 (14%)
BMI	29 (8)	31 (9)	0.008
Race			0.3
Hispanic	29 (13%)	28 (17%)
Non-Hispanic Black	98 (42%)	77 (46%)
Non-Hispanic Other	16 (6.9%)	7 (4.2%)
Non-Hispanic White	88 (38%)	54 (33%)
sqrt.conc	5.87 (3.07)	5.79 (3.13)	0.7
ED_Pain	6 (3)	7 (2)	<0.001
pain	1 (2)	7 (2)	<0.001
¹ Mean (SD); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test

#explore cut off points, pick some levels
summary(e2_full_long$sqrt.conc)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.325   3.867   4.906   5.841   6.844  20.137

length(e2_full_long[e2_full_long$sqrt.conc >= 10, "sqrt.conc"]) #109 values greater than or equal to 10

## [1] 109

length(e2_full_long[e2_full_long$sqrt.conc <= 3, "sqrt.conc"]) #101 values less than or equal to 3

## [1] 101

length(e2_full_long[e2_full_long$sqrt.conc >= 6.844, "sqrt.conc"]) #291 values greater than or equal to 3rd quartile

## [1] 291

length(e2_full_long[e2_full_long$sqrt.conc <= 3.867, "sqrt.conc"]) #289 values less than or equal to 1st quartile

## [1] 289

length(e2_full_long$sqrt.conc[e2_full_long$sqrt.conc >= 3.867 & e2_full_long$sqrt.conc <= 6.844]) #583 values between 1st and 3rd quartile

## [1] 583

#583+289+291 = 1153

make new categories based on this exploration..

e2_full_long <- e2_full_long %>%
  mutate(e2_bin = case_when(
    sqrt.conc <= 3 ~ "low E2",
    sqrt.conc > 3 & sqrt.conc < 10 ~ "mid E2",
    sqrt.conc >= 10 ~ "high E2"
  ))
#new dimensions 1163 x 14

summary table with new categories

e2_full_long %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  add_overall() %>%
  add_p()%>%
  bold_p()

**Full Table: Features by Estrogen Bin, ALL TIME POINTS**
Characteristic	Overall, N = 1,163¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 1,163¹	high E2, N = 109¹	low E2, N = 101¹	mid E2, N = 953¹	p-value²
ED_Age	37 (13)	31 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.024
Some College or Associate Degree	466 (40%)	45 (41%)	28 (28%)	393 (41%)
Advanced Degree	81 (7.0%)	6 (5.5%)	3 (3.0%)	72 (7.6%)
High School or Less	416 (36%)	42 (39%)	44 (44%)	330 (35%)
Bachelor's Degree	200 (17%)	16 (15%)	26 (26%)	158 (17%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.2
Race					<0.001
Hispanic	167 (14%)	9 (8.3%)	17 (17%)	141 (15%)
Non-Hispanic Black	514 (44%)	47 (43%)	35 (35%)	432 (45%)
Non-Hispanic Other	69 (5.9%)	6 (5.5%)	15 (15%)	48 (5.0%)
Non-Hispanic White	413 (36%)	47 (43%)	34 (34%)	332 (35%)
sqrt.conc	5.84 (3.09)	13.44 (2.61)	2.46 (0.39)	5.33 (1.67)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.5
bin_pain					0.2
Low to None	601 (52%)	54 (50%)	44 (44%)	503 (53%)
Moderate to Severe	562 (48%)	55 (50%)	57 (56%)	450 (47%)
pain	4 (3)	4 (3)	5 (3)	4 (3)	0.005
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#above, this shows the category pain is significant (p=.005). this is dependent on time point, which is not in this summary table. low E2 has a mean pain of 5 while mid and high E2 have a mean pain of 4, across all time points.

#when all time points are combined, pain is significant across the different E2 categories
#but below, when i stratify by time point, it is no longer significant

#summary table for MONTH 2
e2_full_long %>%
  filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**
Characteristic	Overall, N = 384¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 384¹	high E2, N = 36¹	low E2, N = 34¹	mid E2, N = 314¹	p-value²
ED_Age	37 (13)	31 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.5
Some College or Associate Degree	155 (40%)	15 (42%)	9 (26%)	131 (42%)
Advanced Degree	27 (7.0%)	2 (5.6%)	1 (2.9%)	24 (7.6%)
High School or Less	135 (35%)	14 (39%)	15 (44%)	106 (34%)
Bachelor's Degree	67 (17%)	5 (14%)	9 (26%)	53 (17%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.6
Race					0.3
Hispanic	55 (14%)	3 (8.3%)	6 (18%)	46 (15%)
Non-Hispanic Black	170 (44%)	16 (44%)	11 (32%)	143 (46%)
Non-Hispanic Other	23 (6.0%)	2 (5.6%)	5 (15%)	16 (5.1%)
Non-Hispanic White	136 (35%)	15 (42%)	12 (35%)	109 (35%)
sqrt.conc	5.84 (3.11)	13.47 (2.66)	2.45 (0.40)	5.33 (1.67)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.7
bin_pain					0.3
Low to None	174 (45%)	13 (36%)	13 (38%)	148 (47%)
Moderate to Severe	210 (55%)	23 (64%)	21 (62%)	166 (53%)
pain	5 (3)	5 (3)	6 (3)	5 (3)	0.10
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

#summary table for MONTH 3
e2_full_long %>%
  filter(time == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**
Characteristic	Overall, N = 382¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 382¹	high E2, N = 36¹	low E2, N = 32¹	mid E2, N = 314¹	p-value²
ED_Age	37 (13)	32 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.7
Some College or Associate Degree	152 (40%)	15 (42%)	9 (28%)	128 (41%)
Advanced Degree	27 (7.1%)	2 (5.6%)	1 (3.1%)	24 (7.6%)
High School or Less	139 (36%)	14 (39%)	14 (44%)	111 (35%)
Bachelor's Degree	64 (17%)	5 (14%)	8 (25%)	51 (16%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.6
Race					0.3
Hispanic	55 (14%)	3 (8.3%)	5 (16%)	47 (15%)
Non-Hispanic Black	169 (44%)	15 (42%)	12 (38%)	142 (45%)
Non-Hispanic Other	23 (6.0%)	2 (5.6%)	5 (16%)	16 (5.1%)
Non-Hispanic White	135 (35%)	16 (44%)	10 (31%)	109 (35%)
sqrt.conc	5.85 (3.08)	13.38 (2.61)	2.47 (0.38)	5.33 (1.68)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.9
bin_pain					0.7
Low to None	196 (51%)	19 (53%)	14 (44%)	163 (52%)
Moderate to Severe	186 (49%)	17 (47%)	18 (56%)	151 (48%)
pain	4 (3)	4 (3)	5 (3)	4 (3)	0.2
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

#summary table for MONTH 6
e2_full_long %>%
  filter(time == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**
Characteristic	Overall, N = 397¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 397¹	high E2, N = 37¹	low E2, N = 35¹	mid E2, N = 325¹	p-value²
ED_Age	37 (13)	31 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.7
Some College or Associate Degree	159 (40%)	15 (41%)	10 (29%)	134 (41%)
Advanced Degree	27 (6.8%)	2 (5.4%)	1 (2.9%)	24 (7.4%)
High School or Less	142 (36%)	14 (38%)	15 (43%)	113 (35%)
Bachelor's Degree	69 (17%)	6 (16%)	9 (26%)	54 (17%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.6
Race					0.3
Hispanic	57 (14%)	3 (8.1%)	6 (17%)	48 (15%)
Non-Hispanic Black	175 (44%)	16 (43%)	12 (34%)	147 (45%)
Non-Hispanic Other	23 (5.8%)	2 (5.4%)	5 (14%)	16 (4.9%)
Non-Hispanic White	142 (36%)	16 (43%)	12 (34%)	114 (35%)
sqrt.conc	5.84 (3.09)	13.46 (2.63)	2.46 (0.40)	5.33 (1.67)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.8
bin_pain					0.5
Low to None	231 (58%)	22 (59%)	17 (49%)	192 (59%)
Moderate to Severe	166 (42%)	15 (41%)	18 (51%)	133 (41%)
pain	4 (3)	4 (3)	5 (3)	4 (3)	0.086
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

what if I only look at low and high e2 levels?

e2_full_long %>%
  filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall()

Characteristic	Overall, N = 210¹	high E2, N = 109¹	low E2, N = 101¹	p-value²
ED_Age	38 (13)	31 (9)	46 (12)	<0.001
ED_HighestGrade				0.070
Some College or Associate Degree	73 (35%)	45 (41%)	28 (28%)
Advanced Degree	9 (4.3%)	6 (5.5%)	3 (3.0%)
High School or Less	86 (41%)	42 (39%)	44 (44%)
Bachelor's Degree	42 (20%)	16 (15%)	26 (26%)
BMI	29 (8)	30 (9)	28 (6)	0.3
Race				0.020
Hispanic	26 (12%)	9 (8.3%)	17 (17%)
Non-Hispanic Black	82 (39%)	47 (43%)	35 (35%)
Non-Hispanic Other	21 (10%)	6 (5.5%)	15 (15%)
Non-Hispanic White	81 (39%)	47 (43%)	34 (34%)
sqrt.conc	8.2 (5.8)	13.4 (2.6)	2.5 (0.4)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	0.2
bin_pain				0.4
Low to None	98 (47%)	54 (50%)	44 (44%)
Moderate to Severe	112 (53%)	55 (50%)	57 (56%)
pain	5 (3)	4 (3)	5 (3)	0.014
¹ Mean (SD); n (%)
² Wilcoxon rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

#this shows pain across all time points is sig (p=.014)

repeat this summary table for each time point

e2_full_long %>%
  filter(time == 2) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall() #2 month, p =.3 for pain

## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

Characteristic	Overall, N = 70¹	high E2, N = 36¹	low E2, N = 34¹	p-value²
ED_Age	38 (13)	31 (9)	46 (12)	<0.001
ED_HighestGrade				0.4
Some College or Associate Degree	24 (34%)	15 (42%)	9 (26%)
Advanced Degree	3 (4.3%)	2 (5.6%)	1 (2.9%)
High School or Less	29 (41%)	14 (39%)	15 (44%)
Bachelor's Degree	14 (20%)	5 (14%)	9 (26%)
BMI	29 (8)	30 (9)	28 (6)	0.5
Race				0.3
Hispanic	9 (13%)	3 (8.3%)	6 (18%)
Non-Hispanic Black	27 (39%)	16 (44%)	11 (32%)
Non-Hispanic Other	7 (10%)	2 (5.6%)	5 (15%)
Non-Hispanic White	27 (39%)	15 (42%)	12 (35%)
sqrt.conc	8.1 (5.9)	13.5 (2.7)	2.4 (0.4)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	0.4
bin_pain				0.9
Low to None	26 (37%)	13 (36%)	13 (38%)
Moderate to Severe	44 (63%)	23 (64%)	21 (62%)
pain	5 (3)	5 (3)	6 (3)	0.3
¹ Mean (SD); n (%)
² Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test

e2_full_long %>%
  filter(time == 3) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall() #3 month, p =.12 for pain

## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

Characteristic	Overall, N = 68¹	high E2, N = 36¹	low E2, N = 32¹	p-value²
ED_Age	38 (12)	32 (9)	46 (12)	<0.001
ED_HighestGrade				0.5
Some College or Associate Degree	24 (35%)	15 (42%)	9 (28%)
Advanced Degree	3 (4.4%)	2 (5.6%)	1 (3.1%)
High School or Less	28 (41%)	14 (39%)	14 (44%)
Bachelor's Degree	13 (19%)	5 (14%)	8 (25%)
BMI	29 (8)	30 (9)	28 (6)	0.5
Race				0.4
Hispanic	8 (12%)	3 (8.3%)	5 (16%)
Non-Hispanic Black	27 (40%)	15 (42%)	12 (38%)
Non-Hispanic Other	7 (10%)	2 (5.6%)	5 (16%)
Non-Hispanic White	26 (38%)	16 (44%)	10 (31%)
sqrt.conc	8.2 (5.8)	13.4 (2.6)	2.5 (0.4)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	0.6
bin_pain				0.5
Low to None	33 (49%)	19 (53%)	14 (44%)
Moderate to Severe	35 (51%)	17 (47%)	18 (56%)
pain	4 (3)	4 (3)	5 (3)	0.12
¹ Mean (SD); n (%)
² Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test

e2_full_long %>%
  filter(time == 6) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall() #6 month, p =.081 for pain

## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties

Characteristic	Overall, N = 72¹	high E2, N = 37¹	low E2, N = 35¹	p-value²
ED_Age	38 (13)	31 (9)	46 (12)	<0.001
ED_HighestGrade				0.6
Some College or Associate Degree	25 (35%)	15 (41%)	10 (29%)
Advanced Degree	3 (4.2%)	2 (5.4%)	1 (2.9%)
High School or Less	29 (40%)	14 (38%)	15 (43%)
Bachelor's Degree	15 (21%)	6 (16%)	9 (26%)
BMI	29 (8)	30 (9)	28 (6)	0.6
Race				0.4
Hispanic	9 (13%)	3 (8.1%)	6 (17%)
Non-Hispanic Black	28 (39%)	16 (43%)	12 (34%)
Non-Hispanic Other	7 (9.7%)	2 (5.4%)	5 (14%)
Non-Hispanic White	28 (39%)	16 (43%)	12 (34%)
sqrt.conc	8.1 (5.9)	13.5 (2.6)	2.5 (0.4)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	0.5
bin_pain				0.4
Low to None	39 (54%)	22 (59%)	17 (49%)
Moderate to Severe	33 (46%)	15 (41%)	18 (51%)
pain	4 (3)	4 (3)	5 (3)	0.081
¹ Mean (SD); n (%)
² Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test

damn !! i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case

visuals

ggplot(e2_full_long, aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

#from this we see that mid and high estrogen had more 0s. low E2 had more 10s
#let's repeat this by time point to see we aren't getting auto-correlation

#BELOW: PAIN AT 2 MONTHS
ggplot(data = e2_full_long %>% filter(time == 2), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

#BELOW: PAIN AT 3 MONTHS
ggplot(data = e2_full_long %>% filter(time == 3), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

#BELOW: PAIN AT 6 MONTHS
ggplot(data = e2_full_long %>% filter(time == 6), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

#boxplot of e2_bins and pain at month 2
ggplot(e2_full_long %>% filter(time == 2), aes(x = pain, y = e2_bin)) +
  geom_boxplot() +
  labs(title = "E2_bin X Pain at month 2")

#boxplot of e2_bins and pain at month 3
ggplot(e2_full_long %>% filter(time == 3), aes(x = pain, y = e2_bin)) +
  geom_boxplot() +
  labs(title = "E2_bin X Pain at month 3")

#boxplot of e2_bins and pain at month 6
ggplot(e2_full_long %>% filter(time == 6), aes(x = pain, y = e2_bin)) +
  geom_boxplot() +
  labs(title = "E2_bin X Pain at month 6")

Ok this is very useful!! We see that low E2 has fewer 0s and more 10s at each time point. This makes me think a different pain stratification should take place that focuses on the extremes of the pain NRS scale.

#high E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "high E2"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "High E2 x Pain",
       x = "Pain", y = "Frequency")

#mid E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "mid E2"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "Mid E2 x Pain",
       x = "Pain", y = "Frequency")

#low E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "low E2"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "Low E2 x Pain",
       x = "Pain", y = "Frequency")

#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 2, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 2 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 3, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 3 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 6, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 6 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

chi square ?

table(e2_full_long$e2_bin, as.factor(e2_full_long$pain))

##          
##             0   1   2   3   4   5   6   7   8   9  10
##   high E2  27   6   9   6   6  15   9  12  10   1   8
##   low E2    7  11   5  10  11  13   9   5   6   3  21
##   mid E2  188  75  55  93  92 111 111  75  71  35  47

table(e2_full_long$e2_bin, e2_full_long$bin_pain)

##          
##           Low to None Moderate to Severe
##   high E2          54                 55
##   low E2           44                 57
##   mid E2          503                450

chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain))

## Warning in chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain)):
## Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and as.factor(e2_full_long$pain)
## X-squared = 59.641, df = 20, p-value = 8.093e-06

chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 3.3259, df = 2, p-value = 0.1896

kruskal.test(pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pain by e2_bin
## Kruskal-Wallis chi-squared = 10.768, df = 2, p-value = 0.00459

make new levels for pain

e2_full_long <- e2_full_long %>%
  mutate(bin_pain2 = case_when(
    pain < 2 ~ "0 or 1 pain",
    pain > 1 & pain < 9 ~ "2 to 8 pain",
    pain > 8 ~ "9 or 10 pain"
  ))
#new dimensions 1163 x 15

e2_full_long <- e2_full_long %>%
  mutate(bin_pain3 = case_when(
    pain < 1 ~ "0 pain",
    pain > 0 & pain < 10 ~ "1 to 9 pain",
    pain > 9 ~ "10 pain"
  ))
#new dimensions 1163 x 16

summary table

#each time point
e2_full_long %>%
  #filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**
Characteristic	Overall, N = 1,163¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 1,163¹	high E2, N = 109¹	low E2, N = 101¹	mid E2, N = 953¹	p-value²
ED_Age	37 (13)	31 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.024
Some College or Associate Degree	466 (40%)	45 (41%)	28 (28%)	393 (41%)
Advanced Degree	81 (7.0%)	6 (5.5%)	3 (3.0%)	72 (7.6%)
High School or Less	416 (36%)	42 (39%)	44 (44%)	330 (35%)
Bachelor's Degree	200 (17%)	16 (15%)	26 (26%)	158 (17%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.2
Race					<0.001
Hispanic	167 (14%)	9 (8.3%)	17 (17%)	141 (15%)
Non-Hispanic Black	514 (44%)	47 (43%)	35 (35%)	432 (45%)
Non-Hispanic Other	69 (5.9%)	6 (5.5%)	15 (15%)	48 (5.0%)
Non-Hispanic White	413 (36%)	47 (43%)	34 (34%)	332 (35%)
sqrt.conc	5.84 (3.09)	13.44 (2.61)	2.46 (0.39)	5.33 (1.67)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.5
bin_pain					0.2
Low to None	601 (52%)	54 (50%)	44 (44%)	503 (53%)
Moderate to Severe	562 (48%)	55 (50%)	57 (56%)	450 (47%)
pain	4 (3)	4 (3)	5 (3)	4 (3)	0.005
bin_pain2					<0.001
0 or 1 pain	314 (27%)	33 (30%)	18 (18%)	263 (28%)
2 to 8 pain	734 (63%)	67 (61%)	59 (58%)	608 (64%)
9 or 10 pain	115 (9.9%)	9 (8.3%)	24 (24%)	82 (8.6%)
bin_pain3					<0.001
0 pain	222 (19%)	27 (25%)	7 (6.9%)	188 (20%)
1 to 9 pain	865 (74%)	74 (68%)	73 (72%)	718 (75%)
10 pain	76 (6.5%)	8 (7.3%)	21 (21%)	47 (4.9%)
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#bin pain 2 and bin pain 3 significant (p<.001)

#month 2
e2_full_long %>%
  filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**WK8 Table: Features by Estrogen Bin, 8 weeks**
Characteristic	Overall, N = 384¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 384¹	high E2, N = 36¹	low E2, N = 34¹	mid E2, N = 314¹	p-value²
ED_Age	37 (13)	31 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.5
Some College or Associate Degree	155 (40%)	15 (42%)	9 (26%)	131 (42%)
Advanced Degree	27 (7.0%)	2 (5.6%)	1 (2.9%)	24 (7.6%)
High School or Less	135 (35%)	14 (39%)	15 (44%)	106 (34%)
Bachelor's Degree	67 (17%)	5 (14%)	9 (26%)	53 (17%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.6
Race					0.3
Hispanic	55 (14%)	3 (8.3%)	6 (18%)	46 (15%)
Non-Hispanic Black	170 (44%)	16 (44%)	11 (32%)	143 (46%)
Non-Hispanic Other	23 (6.0%)	2 (5.6%)	5 (15%)	16 (5.1%)
Non-Hispanic White	136 (35%)	15 (42%)	12 (35%)	109 (35%)
sqrt.conc	5.84 (3.11)	13.47 (2.66)	2.45 (0.40)	5.33 (1.67)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.7
bin_pain					0.3
Low to None	174 (45%)	13 (36%)	13 (38%)	148 (47%)
Moderate to Severe	210 (55%)	23 (64%)	21 (62%)	166 (53%)
pain	5 (3)	5 (3)	6 (3)	5 (3)	0.10
bin_pain2					0.3
0 or 1 pain	72 (19%)	7 (19%)	4 (12%)	61 (19%)
2 to 8 pain	267 (70%)	26 (72%)	22 (65%)	219 (70%)
9 or 10 pain	45 (12%)	3 (8.3%)	8 (24%)	34 (11%)
bin_pain3					0.047
0 pain	48 (13%)	6 (17%)	2 (5.9%)	40 (13%)
1 to 9 pain	307 (80%)	27 (75%)	25 (74%)	255 (81%)
10 pain	29 (7.6%)	3 (8.3%)	7 (21%)	19 (6.1%)
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

#bin pain 3 significant (p=0.047) but not bin_pain2

#month 3
e2_full_long %>%
  filter(time == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**M3 Table: Features by Estrogen Bin, 3 months**
Characteristic	Overall, N = 382¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 382¹	high E2, N = 36¹	low E2, N = 32¹	mid E2, N = 314¹	p-value²
ED_Age	37 (13)	32 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.7
Some College or Associate Degree	152 (40%)	15 (42%)	9 (28%)	128 (41%)
Advanced Degree	27 (7.1%)	2 (5.6%)	1 (3.1%)	24 (7.6%)
High School or Less	139 (36%)	14 (39%)	14 (44%)	111 (35%)
Bachelor's Degree	64 (17%)	5 (14%)	8 (25%)	51 (16%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.6
Race					0.3
Hispanic	55 (14%)	3 (8.3%)	5 (16%)	47 (15%)
Non-Hispanic Black	169 (44%)	15 (42%)	12 (38%)	142 (45%)
Non-Hispanic Other	23 (6.0%)	2 (5.6%)	5 (16%)	16 (5.1%)
Non-Hispanic White	135 (35%)	16 (44%)	10 (31%)	109 (35%)
sqrt.conc	5.85 (3.08)	13.38 (2.61)	2.47 (0.38)	5.33 (1.68)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.9
bin_pain					0.7
Low to None	196 (51%)	19 (53%)	14 (44%)	163 (52%)
Moderate to Severe	186 (49%)	17 (47%)	18 (56%)	151 (48%)
pain	4 (3)	4 (3)	5 (3)	4 (3)	0.2
bin_pain2					0.086
0 or 1 pain	105 (27%)	13 (36%)	5 (16%)	87 (28%)
2 to 8 pain	241 (63%)	20 (56%)	20 (63%)	201 (64%)
9 or 10 pain	36 (9.4%)	3 (8.3%)	7 (22%)	26 (8.3%)
bin_pain3					0.012
0 pain	72 (19%)	10 (28%)	3 (9.4%)	59 (19%)
1 to 9 pain	287 (75%)	23 (64%)	23 (72%)	241 (77%)
10 pain	23 (6.0%)	3 (8.3%)	6 (19%)	14 (4.5%)
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

#bin_pain3 is sig (p=0.012) but not bin pain 2 

#month 6
e2_full_long %>%
  filter(time == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

**M6 Table: Features by Estrogen Bin, 6 months**
Characteristic	Overall, N = 397¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 397¹	high E2, N = 37¹	low E2, N = 35¹	mid E2, N = 325¹	p-value²
ED_Age	37 (13)	31 (9)	46 (12)	37 (14)	<0.001
ED_HighestGrade					0.7
Some College or Associate Degree	159 (40%)	15 (41%)	10 (29%)	134 (41%)
Advanced Degree	27 (6.8%)	2 (5.4%)	1 (2.9%)	24 (7.4%)
High School or Less	142 (36%)	14 (38%)	15 (43%)	113 (35%)
Bachelor's Degree	69 (17%)	6 (16%)	9 (26%)	54 (17%)
BMI	30 (8)	30 (9)	28 (6)	30 (8)	0.6
Race					0.3
Hispanic	57 (14%)	3 (8.1%)	6 (17%)	48 (15%)
Non-Hispanic Black	175 (44%)	16 (43%)	12 (34%)	147 (45%)
Non-Hispanic Other	23 (5.8%)	2 (5.4%)	5 (14%)	16 (4.9%)
Non-Hispanic White	142 (36%)	16 (43%)	12 (34%)	114 (35%)
sqrt.conc	5.84 (3.09)	13.46 (2.63)	2.46 (0.40)	5.33 (1.67)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.8
bin_pain					0.5
Low to None	231 (58%)	22 (59%)	17 (49%)	192 (59%)
Moderate to Severe	166 (42%)	15 (41%)	18 (51%)	133 (41%)
pain	4 (3)	4 (3)	5 (3)	4 (3)	0.086
bin_pain2					0.021
0 or 1 pain	137 (35%)	13 (35%)	9 (26%)	115 (35%)
2 to 8 pain	226 (57%)	21 (57%)	17 (49%)	188 (58%)
9 or 10 pain	34 (8.6%)	3 (8.1%)	9 (26%)	22 (6.8%)
bin_pain3					<0.001
0 pain	102 (26%)	11 (30%)	2 (5.7%)	89 (27%)
1 to 9 pain	271 (68%)	24 (65%)	25 (71%)	222 (68%)
10 pain	24 (6.0%)	2 (5.4%)	8 (23%)	14 (4.3%)
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

#bin pain 2 sig (p=0.021) and bin pain 3 sig (p<.001)

hell yeah

stats for new categories

table(e2_full_long$e2_bin, e2_full_long$bin_pain3)

##          
##           0 pain 1 to 9 pain 10 pain
##   high E2     27          74       8
##   low E2       7          73      21
##   mid E2     188         718      47

round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain3), margin = 1) * 100,2)

##          
##           0 pain 1 to 9 pain 10 pain
##   high E2  24.77       67.89    7.34
##   low E2    6.93       72.28   20.79
##   mid E2   19.73       75.34    4.93

chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"], e2_full_long[e2_full_long$time == 2, "bin_pain3"]) # p-value = 0.03053

## Warning in chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 2, "e2_bin"] and e2_full_long[e2_full_long$time == 2, "bin_pain3"]
## X-squared = 10.67, df = 4, p-value = 0.03053

chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"], e2_full_long[e2_full_long$time == 3, "bin_pain3"]) #p-value = 0.007257

## Warning in chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 3, "e2_bin"] and e2_full_long[e2_full_long$time == 3, "bin_pain3"]
## X-squared = 14.012, df = 4, p-value = 0.007257

chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"], e2_full_long[e2_full_long$time == 6, "bin_pain3"]) #p-value = 7.419e-05

## Warning in chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 6, "e2_bin"] and e2_full_long[e2_full_long$time == 6, "bin_pain3"]
## X-squared = 24.16, df = 4, p-value = 7.419e-05

redo model from above

MARCH 2024 NEW E2 DATA CODE GLS MODEL

e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)

summary(e2_model)

## Generalized least squares fit by REML
##   Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain +      Race + Plate.Number + e2_bin 
##   Data: e2_full_long 
##        AIC      BIC    logLik
##   5563.071 5648.849 -2764.535
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.7287282 
## 
## Coefficients:
##                                         Value Std.Error   t-value p-value
## (Intercept)                         1.1045118 1.2952495  0.852741  0.3940
## time                               -0.2372138 0.0453306 -5.232973  0.0000
## sqrt.conc                           0.0769038 0.0720024  1.068072  0.2857
## ED_Age                              0.0397639 0.0097896  4.061841  0.0001
## ED_HighestGradeAdvanced Degree     -1.0276386 0.5174215 -1.986076  0.0473
## ED_HighestGradeHigh School or Less -0.0479274 0.2807562 -0.170708  0.8645
## ED_HighestGradeBachelor's Degree   -0.7130133 0.3522310 -2.024277  0.0432
## BMI                                 0.0287845 0.0147894  1.946294  0.0519
## ED_Pain                             0.2582287 0.0489249  5.278060  0.0000
## RaceNon-Hispanic Black             -0.5138723 0.3735496 -1.375647  0.1692
## RaceNon-Hispanic Other             -0.5722177 0.5989953 -0.955296  0.3396
## RaceNon-Hispanic White             -0.5373559 0.3901958 -1.377144  0.1687
## Plate.Number                       -0.0233885 0.0186592 -1.253453  0.2103
## e2_binlow E2                        1.6358354 0.9715319  1.683769  0.0925
## e2_binmid E2                        0.4536050 0.7128541  0.636322  0.5247
## 
##  Correlation: 
##                                    (Intr) time   sqrt.c ED_Age ED_HGAD ED_SoL
## time                               -0.143                                    
## sqrt.conc                          -0.743 -0.001                             
## ED_Age                             -0.280  0.004  0.146                      
## ED_HighestGradeAdvanced Degree     -0.153  0.002  0.013 -0.106               
## ED_HighestGradeHigh School or Less -0.149  0.000 -0.001  0.117  0.231        
## ED_HighestGradeBachelor's Degree   -0.083  0.000 -0.019 -0.058  0.239   0.343
## BMI                                -0.237  0.006 -0.107 -0.180  0.072   0.044
## ED_Pain                            -0.272  0.002 -0.045  0.046  0.135  -0.073
## RaceNon-Hispanic Black             -0.222  0.000  0.004 -0.069  0.224   0.038
## RaceNon-Hispanic Other             -0.107  0.002 -0.096  0.040  0.110   0.007
## RaceNon-Hispanic White             -0.266 -0.002  0.044 -0.090  0.134   0.055
## Plate.Number                       -0.206  0.000  0.029 -0.019  0.062   0.067
## e2_binlow E2                       -0.709 -0.001  0.798 -0.039  0.040  -0.039
## e2_binmid E2                       -0.771 -0.002  0.811  0.046  0.014   0.000
##                                    ED_HGBD BMI    ED_Pan RcN-HB RcN-HO RcN-HW
## time                                                                         
## sqrt.conc                                                                    
## ED_Age                                                                       
## ED_HighestGradeAdvanced Degree                                               
## ED_HighestGradeHigh School or Less                                           
## ED_HighestGradeBachelor's Degree                                             
## BMI                                 0.028                                    
## ED_Pain                             0.070   0.013                            
## RaceNon-Hispanic Black              0.075  -0.010  0.014                     
## RaceNon-Hispanic Other              0.056   0.019  0.097  0.464              
## RaceNon-Hispanic White             -0.048  -0.012  0.157  0.718  0.441       
## Plate.Number                        0.014   0.059  0.075 -0.037 -0.047 -0.105
## e2_binlow E2                       -0.044  -0.033 -0.014  0.048 -0.105  0.092
## e2_binmid E2                       -0.014  -0.079 -0.012  0.029 -0.063  0.083
##                                    Plt.Nm e2_bnlE2
## time                                              
## sqrt.conc                                         
## ED_Age                                            
## ED_HighestGradeAdvanced Degree                    
## ED_HighestGradeHigh School or Less                
## ED_HighestGradeBachelor's Degree                  
## BMI                                               
## ED_Pain                                           
## RaceNon-Hispanic Black                            
## RaceNon-Hispanic Other                            
## RaceNon-Hispanic White                            
## Plate.Number                                      
## e2_binlow E2                        0.026         
## e2_binmid E2                        0.034  0.880  
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.20825378 -0.80873595  0.03119517  0.73834813  2.56387776 
## 
## Residual standard error: 2.996335 
## Degrees of freedom: 1163 total; 1148 residual

#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain3,
                                        `0 pain` = "1",
                                        `1 to 9 pain` = "2",
                                        `10 pain` = "3") #new dimensions 1163 x 17

#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2  p = 2.398452e-03 = 0.002398452

## # weights:  48 (30 variable)
## initial  value 1277.686092 
## iter  10 value 976.255582
## iter  20 value 828.205849
## iter  30 value 760.275081
## iter  40 value 757.915874
## final  value 757.915779 
## converged

#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE)

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	1.0845445	0.8347454	1.2992519	0.1938575	-0.5515263	2.7206154
2	time	-0.1986780	0.0441989	-4.4950857	0.0000070	-0.2853063	-0.1120497
2	sqrt.conc	-0.0246233	0.0448684	-0.5487893	0.5831500	-0.1125638	0.0633172
2	ED_Age	0.0241006	0.0066603	3.6185254	0.0002963	0.0110466	0.0371546
2	ED_HighestGradeAdvanced Degree	0.0625538	0.3408485	0.1835238	0.8543871	-0.6054970	0.7306046
2	ED_HighestGradeHigh School or Less	-0.1319082	0.1793021	-0.7356753	0.4619283	-0.4833339	0.2195176
2	ED_HighestGradeBachelor’s Degree	0.1582817	0.2390016	0.6622620	0.5078034	-0.3101529	0.6267163
2	BMI	0.0116255	0.0097880	1.1877339	0.2349383	-0.0075586	0.0308096
2	ED_Pain	0.0897672	0.0315763	2.8428666	0.0044710	0.0278788	0.1516556
2	RaceNon-Hispanic Black	-0.5316580	0.2670261	-1.9910341	0.0464771	-1.0550195	-0.0082965
2	RaceNon-Hispanic Other	-0.3982038	0.3928737	-1.0135670	0.3107894	-1.1682220	0.3718145
2	RaceNon-Hispanic White	-0.3554165	0.2777867	-1.2794584	0.2007357	-0.8998683	0.1890354
2	Plate.Number	-0.0205870	0.0118948	-1.7307527	0.0834959	-0.0439005	0.0027264
2	e2_binlow E2	0.8054025	0.6832337	1.1788096	0.2384740	-0.5337110	2.1445161
2	e2_binmid E2	0.0402581	0.4401452	0.0914656	0.9271226	-0.8224105	0.9029268
3	(Intercept)	-4.3091417	1.5604971	-2.7613905	0.0057556	-7.3676598	-1.2506237
3	time	-0.2171498	0.0828435	-2.6212044	0.0087620	-0.3795201	-0.0547795
3	sqrt.conc	0.1403640	0.0810177	1.7325095	0.0831829	-0.0184278	0.2991558
3	ED_Age	0.0518864	0.0123929	4.1868010	0.0000283	0.0275969	0.0761759
3	ED_HighestGradeAdvanced Degree	-1.5629041	1.0823019	-1.4440556	0.1487233	-3.6841767	0.5583686
3	ED_HighestGradeHigh School or Less	-0.1705342	0.3118840	-0.5467874	0.5845248	-0.7818157	0.4407472
3	ED_HighestGradeBachelor’s Degree	-0.6793027	0.5122167	-1.3262017	0.1847729	-1.6832291	0.3246236
3	BMI	-0.0214102	0.0196563	-1.0892260	0.2760542	-0.0599359	0.0171155
3	ED_Pain	0.3089434	0.0655578	4.7125342	0.0000024	0.1804525	0.4374343
3	RaceNon-Hispanic Black	-0.5619284	0.4226214	-1.3296261	0.1836415	-1.3902511	0.2663943
3	RaceNon-Hispanic Other	-0.8826778	0.7154517	-1.2337350	0.2173017	-2.2849373	0.5195818
3	RaceNon-Hispanic White	-0.6647507	0.4814270	-1.3807924	0.1673428	-1.6083302	0.2788288
3	Plate.Number	-0.0440065	0.0225823	-1.9487162	0.0513293	-0.0882670	0.0002540
3	e2_binlow E2	3.3565935	1.1056458	3.0358670	0.0023985	1.1895676	5.5236193
3	e2_binmid E2	0.7428651	0.7929725	0.9368107	0.3488560	-0.8113324	2.2970625

#e2_bin low E2 on category 3 (10 pain)  p = 2.398452e-03 = 0.002398452  
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain



# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 2, control = list(singular.ok = FALSE), data = e2_full_long)

## # weights:  45 (28 variable)
## initial  value 421.867119 
## iter  10 value 246.880449
## iter  20 value 217.350612
## iter  30 value 215.581016
## iter  40 value 215.559264
## iter  50 value 215.558537
## iter  50 value 215.558536
## iter  50 value 215.558536
## final  value 215.558536 
## converged

tidy(model_m2, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	0.0858882	1.7295095	4.966050e-02	0.9603930	-3.3038880	3.4756645
2	sqrt.conc	-0.0147206	0.0959880	-1.533589e-01	0.8781152	-0.2028536	0.1734124
2	ED_Age	0.0334912	0.0143580	2.332580e+00	0.0196702	0.0053500	0.0616325
2	ED_HighestGradeAdvanced Degree	0.1225345	0.7225473	1.695868e-01	0.8653351	-1.2936321	1.5387012
2	ED_HighestGradeHigh School or Less	-0.4313307	0.3744552	-1.151889e+00	0.2493667	-1.1652494	0.3025879
2	ED_HighestGradeBachelor’s Degree	0.1035662	0.5042208	2.053985e-01	0.8372608	-0.8846884	1.0918208
2	BMI	-0.0005951	0.0197932	-3.006750e-02	0.9760132	-0.0393891	0.0381989
2	ED_Pain	0.2279952	0.0642853	3.546617e+00	0.0003902	0.1019984	0.3539920
2	RaceNon-Hispanic Black	-0.9484818	0.6599985	-1.437097e+00	0.1506904	-2.2420551	0.3450915
2	RaceNon-Hispanic Other	-0.8525189	0.8584694	-9.930685e-01	0.3206766	-2.5350880	0.8300502
2	RaceNon-Hispanic White	-0.9190518	0.6705089	-1.370678e+00	0.1704753	-2.2332251	0.3951215
2	Plate.Number	0.0172467	0.0253205	6.811342e-01	0.4957866	-0.0323806	0.0668739
2	e2_binlow E2	0.5784955	1.3879456	4.167998e-01	0.6768248	-2.1418279	3.2988188
2	e2_binmid E2	0.1494685	0.9184424	1.627413e-01	0.8707221	-1.6506455	1.9495825
3	(Intercept)	-6.3707373	2.7188448	-2.343178e+00	0.0191203	-11.6995752	-1.0418994
3	sqrt.conc	0.2628004	0.1411389	1.861998e+00	0.0626034	-0.0138268	0.5394275
3	ED_Age	0.0463402	0.0222611	2.081669e+00	0.0373727	0.0027093	0.0899712
3	ED_HighestGradeAdvanced Degree	-14.2193859	0.0000018	-8.033872e+06	0.0000000	-14.2193894	-14.2193825
3	ED_HighestGradeHigh School or Less	-0.7225669	0.5709831	-1.265478e+00	0.2056998	-1.8416733	0.3965395
3	ED_HighestGradeBachelor’s Degree	-0.6155575	0.8259645	-7.452590e-01	0.4561151	-2.2344182	1.0033032
3	BMI	-0.0466263	0.0353995	-1.317147e+00	0.1877895	-0.1160081	0.0227554
3	ED_Pain	0.4455983	0.1157997	3.848008e+00	0.0001191	0.2186350	0.6725617
3	RaceNon-Hispanic Black	-1.0157518	0.8513202	-1.193149e+00	0.2328111	-2.6843087	0.6528052
3	RaceNon-Hispanic Other	-1.9980674	1.4362052	-1.391213e+00	0.1641608	-4.8129778	0.8168429
3	RaceNon-Hispanic White	-0.9375516	0.9131053	-1.026773e+00	0.3045275	-2.7272050	0.8521018
3	Plate.Number	0.0154110	0.0394421	3.907258e-01	0.6959999	-0.0618940	0.0927161
3	e2_binlow E2	4.8051741	2.0343863	2.361977e+00	0.0181778	0.8178502	8.7924980
3	e2_binmid E2	2.1336870	1.4507924	1.470705e+00	0.1413710	-0.7098137	4.9771878

#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 3, control = list(singular.ok = FALSE), data = e2_full_long)

## # weights:  45 (28 variable)
## initial  value 419.669894 
## iter  10 value 265.651792
## iter  20 value 245.264571
## iter  30 value 243.580449
## final  value 243.578224 
## converged

tidy(model_m3, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #no significance

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	0.1438903	1.4251253	0.1009668	0.9195768	-2.6493040	2.9370847
2	sqrt.conc	-0.0282643	0.0773150	-0.3655737	0.7146832	-0.1797990	0.1232703
2	ED_Age	0.0265784	0.0116658	2.2783275	0.0227071	0.0037140	0.0494429
2	ED_HighestGradeAdvanced Degree	0.1810048	0.6172130	0.2932614	0.7693223	-1.0287105	1.3907201
2	ED_HighestGradeHigh School or Less	-0.0572961	0.3072826	-0.1864606	0.8520835	-0.6595590	0.5449667
2	ED_HighestGradeBachelor’s Degree	0.3226113	0.4328355	0.7453439	0.4560638	-0.5257307	1.1709534
2	BMI	0.0202893	0.0174147	1.1650659	0.2439923	-0.0138429	0.0544214
2	ED_Pain	0.0500940	0.0552732	0.9062990	0.3647776	-0.0582394	0.1584275
2	RaceNon-Hispanic Black	-0.7732148	0.4909491	-1.5749389	0.1152706	-1.7354572	0.1890277
2	RaceNon-Hispanic Other	-0.2607000	0.7272638	-0.3584668	0.7199940	-1.6861108	1.1647109
2	RaceNon-Hispanic White	-0.6709575	0.5101474	-1.3152227	0.1884351	-1.6708281	0.3289131
2	Plate.Number	-0.0070094	0.0207073	-0.3384981	0.7349879	-0.0475949	0.0335762
2	e2_binlow E2	0.4799754	1.1288069	0.4252059	0.6706866	-1.7324456	2.6923963
2	e2_binmid E2	0.2094152	0.7478034	0.2800404	0.7794465	-1.2562526	1.6750830
3	(Intercept)	-5.8817990	2.7799044	-2.1158278	0.0343595	-11.3303114	-0.4332865
3	sqrt.conc	0.1531560	0.1468975	1.0426041	0.2971317	-0.1347579	0.4410698
3	ED_Age	0.0506380	0.0229009	2.2111761	0.0270236	0.0057530	0.0955230
3	ED_HighestGradeAdvanced Degree	0.0678288	1.2654705	0.0535997	0.9572541	-2.4124479	2.5481055
3	ED_HighestGradeHigh School or Less	0.2665381	0.5757837	0.4629136	0.6434263	-0.8619772	1.3950535
3	ED_HighestGradeBachelor’s Degree	-0.0078086	0.9238302	-0.0084524	0.9932560	-1.8184825	1.8028652
3	BMI	-0.0015620	0.0341004	-0.0458069	0.9634641	-0.0683975	0.0652734
3	ED_Pain	0.3746751	0.1269878	2.9504817	0.0031728	0.1257836	0.6235665
3	RaceNon-Hispanic Black	-1.1860836	0.7500276	-1.5813866	0.1137897	-2.6561106	0.2839435
3	RaceNon-Hispanic Other	-1.1249035	1.3691195	-0.8216255	0.4112901	-3.8083284	1.5585215
3	RaceNon-Hispanic White	-0.7911548	0.8243052	-0.9597838	0.3371640	-2.4067633	0.8244537
3	Plate.Number	-0.0840347	0.0453981	-1.8510609	0.0641608	-0.1730133	0.0049440
3	e2_binlow E2	2.9876362	1.8874151	1.5829248	0.1134386	-0.7116295	6.6869018
3	e2_binmid E2	0.5806531	1.3310391	0.4362404	0.6626623	-2.0281356	3.1894418

#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 6, control = list(singular.ok = FALSE), data = e2_full_long)

## # weights:  45 (28 variable)
## initial  value 436.149079 
## iter  10 value 306.109642
## iter  20 value 280.123037
## iter  30 value 278.298475
## iter  40 value 278.254523
## iter  50 value 278.253270
## iter  50 value 278.253268
## iter  50 value 278.253268
## final  value 278.253268 
## converged

tidy(model_m6, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	0.5545904	1.2574649	4.410385e-01	0.6591851	-1.9099955	3.0191763
2	sqrt.conc	-0.0206597	0.0689491	-2.996363e-01	0.7644546	-0.1557975	0.1144782
2	ED_Age	0.0182652	0.0100995	1.808518e+00	0.0705258	-0.0015295	0.0380599
2	ED_HighestGradeAdvanced Degree	-0.0670360	0.5113894	-1.310860e-01	0.8957073	-1.0693408	0.9352688
2	ED_HighestGradeHigh School or Less	-0.0116962	0.2802121	-4.174060e-02	0.9667055	-0.5609020	0.5375095
2	ED_HighestGradeBachelor’s Degree	0.0680815	0.3563096	1.910741e-01	0.8484675	-0.6302724	0.7664355
2	BMI	0.0119860	0.0150889	7.943606e-01	0.4269855	-0.0175877	0.0415597
2	ED_Pain	0.0405024	0.0494682	8.187562e-01	0.4129255	-0.0564535	0.1374584
2	RaceNon-Hispanic Black	-0.2305422	0.3803868	-6.060730e-01	0.5444663	-0.9760867	0.5150023
2	RaceNon-Hispanic Other	-0.3601504	0.5932052	-6.071261e-01	0.5437672	-1.5228113	0.8025105
2	RaceNon-Hispanic White	0.0853640	0.3992413	2.138155e-01	0.8306909	-0.6971346	0.8678625
2	Plate.Number	-0.0524656	0.0184486	-2.843882e+00	0.0044568	-0.0886242	-0.0163070
2	e2_binlow E2	1.3410510	1.1340476	1.182535e+00	0.2369935	-0.8816414	3.5637434
2	e2_binmid E2	-0.1077641	0.6895640	-1.562786e-01	0.8758134	-1.4592846	1.2437564
3	(Intercept)	-3.0896264	2.8204937	-1.095420e+00	0.2733325	-8.6176924	2.4384396
3	sqrt.conc	-0.0463444	0.1645973	-2.815624e-01	0.7782791	-0.3689492	0.2762604
3	ED_Age	0.0722735	0.0220570	3.276674e+00	0.0010504	0.0290426	0.1155043
3	ED_HighestGradeAdvanced Degree	-13.9746453	0.0000027	-5.102638e+06	0.0000000	-13.9746507	-13.9746399
3	ED_HighestGradeHigh School or Less	-0.0954912	0.5289536	-1.805284e-01	0.8567377	-1.1322212	0.9412389
3	ED_HighestGradeBachelor’s Degree	-1.5368456	1.1258387	-1.365067e+00	0.1722319	-3.7434489	0.6697578
3	BMI	-0.0224125	0.0352416	-6.359679e-01	0.5247974	-0.0914848	0.0466598
3	ED_Pain	0.1922234	0.1068746	1.798588e+00	0.0720838	-0.0172470	0.4016938
3	RaceNon-Hispanic Black	0.1720303	0.7607170	2.261424e-01	0.8210907	-1.3189476	1.6630083
3	RaceNon-Hispanic Other	0.1933064	1.1271797	1.714956e-01	0.8638341	-2.0159253	2.4025380
3	RaceNon-Hispanic White	-0.9022036	0.9595953	-9.401918e-01	0.3471192	-2.7829758	0.9785685
3	Plate.Number	-0.0614748	0.0387215	-1.587613e+00	0.1123738	-0.1373677	0.0144180
3	e2_binlow E2	1.6842627	2.0821929	8.088889e-01	0.4185791	-2.3967604	5.7652858
3	e2_binmid E2	-0.9692742	1.5326311	-6.324250e-01	0.5271092	-3.9731760	2.0346275

#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)

miRNA original code for miRNA

E2_women_wide_all<- E2_women_wide %>% 
  inner_join(ID_miRNA,by="PID") 

E2_women_long_all<- E2_women_long %>% 
  inner_join(ID_miRNA,by="PID")

## Warning in inner_join(., ID_miRNA, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 176 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

NEW MARCH 2024 CODE FOR miRNA

none of the PIDs match up between the new estrogen data and the miRNA data

#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)

e2_wide_all<- e2_full_wide %>% 
  inner_join(ID_miRNA,by="PID") 

E2_long_all<- e2_full_long %>% 
  inner_join(ID_miRNA,by="PID")

miRNA MARCH 2024 FEMALE ONLY Associating E2–>Pain

Alice Woolard

2024-03-14