#library(edgeR);#library(sva)
library(data.table);library(ggfortify)
## Loading required package: ggplot2
library(factoextra);library(ClassDiscovery)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Loading required package: cluster
## Loading required package: oompaBase
library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)
## 
## Attaching package: 'memisc'
## The following object is masked from 'package:ggplot2':
## 
##     syms
## The following objects are masked from 'package:stats':
## 
##     contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
## 
##     as.array
library(tidyverse);library(naniar);library(gtsummary)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%()             masks memisc::%@%()
## ✖ lubridate::as.interval() masks memisc::as.interval()
## ✖ dplyr::between()         masks data.table::between()
## ✖ dplyr::collect()         masks memisc::collect()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ dplyr::first()           masks data.table::first()
## ✖ dplyr::group_rows()      masks kableExtra::group_rows()
## ✖ lubridate::hour()        masks data.table::hour()
## ✖ lubridate::is.interval() masks memisc::is.interval()
## ✖ lubridate::isoweek()     masks data.table::isoweek()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ dplyr::last()            masks data.table::last()
## ✖ lubridate::mday()        masks data.table::mday()
## ✖ lubridate::minute()      masks data.table::minute()
## ✖ lubridate::month()       masks data.table::month()
## ✖ lubridate::quarter()     masks data.table::quarter()
## ✖ dplyr::recode()          masks memisc::recode()
## ✖ dplyr::rename()          masks memisc::rename()
## ✖ lubridate::second()      masks data.table::second()
## ✖ dplyr::select()          masks MASS::select()
## ✖ dplyr::syms()            masks memisc::syms(), ggplot2::syms()
## ✖ purrr::transpose()       masks data.table::transpose()
## ✖ tibble::view()           masks memisc::view()
## ✖ lubridate::wday()        masks data.table::wday()
## ✖ lubridate::week()        masks data.table::week()
## ✖ lubridate::yday()        masks data.table::yday()
## ✖ lubridate::year()        masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'gtsummary'
## 
## 
## The following object is masked from 'package:MASS':
## 
##     select
require(lme4); require(nnet); require(tidyverse);
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: nnet
library(broom);library(readxl);library(nlme)
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:lme4':
## 
##     lmList
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse

NEW DATA FILES

#aurora data
df0 <- read.csv("full_aurora.csv")

estrogen data

#estrogen levels
e2_conc <- read.csv("E2_concentrated.csv")
#e2_od <- read.csv("E2_OD.csv") #Do I need this data?

#filter out the 0 values, 420 values, and 0 CV values
e2_conc$Mean.Concentration..pg.mL. <- as.numeric(e2_conc$Mean.Concentration..pg.mL.)
## Warning: NAs introduced by coercion
e2_conc$X..CV <- as.numeric(e2_conc$X..CV)
## Warning: NAs introduced by coercion
#filter add bad samples
e2_conc <- e2_conc %>% 
  rename(mean.conc = Mean.Concentration..pg.mL.) %>% 
  filter(mean.conc > 0) %>%
  filter(mean.conc < 420) %>%
  filter(`X..CV` > 0) %>%
  rename(CV = X..CV)

#square root transform concentration
e2_conc <- e2_conc %>%
  mutate(sqrt.conc = sqrt(mean.conc)) %>%
  select(everything(),mean.conc, sqrt.conc)

#remove extra columns
e2_conc <- e2_conc[, -c(4, 5, 7, 9, 10)] #dimensions 860 x 6

keys

#PID to inventory ID key for estrogen
key <- read_excel("e2_pid_key.xlsx")

#sample ID to inventory ID and PID Keys for miRNA
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")

miRNA data

#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned) 
#miRNA_cleaned dimensions 596 x 1552

Link estrogen data with key

e2_keyed <- e2_conc %>% 
  right_join(key,by=c("Inventory.Code" = "InventoryCode")) 

#the key has 906 inventory ID/PIDs and the estrogen only has 860 samples

Link miRNA with keys

ID2 <- ID_link %>% 
  right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub")) 

ID_miRNA <- miRNA_cleaned %>%
  mutate(sample=as.numeric(sample)) %>% 
  left_join(ID2,by="sample") #596 samples

length(unique(ID_miRNA$PID)) # unique PID 294
## [1] 294

subset to only the timepoint 0/ED visit miRNA values

ID_miRNA_T0 <- ID_miRNA %>% 
  filter(`ALT ID` == "T0") #dimensions 294 x 1557

ORIGNAL aurora data, estrogen

#Use the data from Jarred directly, not read in the data saved, because it will change some variable names format
df <- read.csv("AURORA_estrogen_excl_outliers_excl_BMI.csv") # dimensions 283 x 3035

E2_women_excl_outliers_excl_BMI <- df[df$ED_GenderBirthCert == 2,] #dim 197 x 3035 
#E2_men_excl_outliers_excl_BMI <- df[df$ED_GenderBirthCert == 1,] #86, 3035 #not using men rn 

###E2_women (excluding outliers but added BMI) #table 2
E2_women_wide <- E2_women_excl_outliers_excl_BMI[,c("PID","quartile","tertile","median","ED_Age","HighestGrade","BMI","WK8_Pain","M3_Pain","M6_Pain","E2.Concentration..pg.ml._T0_sqrt","ED_NowPain.x","Race","Date.Run_T0")] #dimensions 197 x 14

summary(E2_women_wide)
##       PID            quartile        tertile          median     
##  Min.   :100183   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:102989   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :106076   Median :2.000   Median :2.000   Median :1.000  
##  Mean   :105821   Mean   :2.492   Mean   :1.995   Mean   :1.497  
##  3rd Qu.:108692   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :111301   Max.   :4.000   Max.   :3.000   Max.   :2.000  
##                                                                  
##      ED_Age       HighestGrade        BMI           WK8_Pain     
##  Min.   :18.00   Min.   :1.000   Min.   :17.30   Min.   : 0.000  
##  1st Qu.:27.00   1st Qu.:2.000   1st Qu.:25.43   1st Qu.: 3.000  
##  Median :37.00   Median :3.000   Median :30.33   Median : 5.000  
##  Mean   :39.37   Mean   :2.883   Mean   :31.43   Mean   : 5.211  
##  3rd Qu.:51.00   3rd Qu.:4.000   3rd Qu.:36.02   3rd Qu.: 8.000  
##  Max.   :73.00   Max.   :4.000   Max.   :57.50   Max.   :10.000  
##                  NA's   :1       NA's   :27      NA's   :12      
##     M3_Pain          M6_Pain       E2.Concentration..pg.ml._T0_sqrt
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.8124                 
##  1st Qu.: 2.000   1st Qu.: 0.000   1st Qu.: 4.1975                 
##  Median : 5.000   Median : 4.000   Median : 7.1516                 
##  Mean   : 4.716   Mean   : 4.085   Mean   : 8.6909                 
##  3rd Qu.: 7.000   3rd Qu.: 7.000   3rd Qu.:13.0289                 
##  Max.   :10.000   Max.   :10.000   Max.   :21.7290                 
##  NA's   :14       NA's   :21                                       
##   ED_NowPain.x         Race        Date.Run_T0   
##  Min.   : 0.000   Min.   :1.000   Min.   :44032  
##  1st Qu.: 5.000   1st Qu.:2.000   1st Qu.:44039  
##  Median : 7.000   Median :3.000   Median :44063  
##  Mean   : 6.655   Mean   :2.587   Mean   :44073  
##  3rd Qu.: 8.000   3rd Qu.:3.000   3rd Qu.:44082  
##  Max.   :10.000   Max.   :3.000   Max.   :44157  
##                   NA's   :1
length(unique(E2_women_wide[["PID"]])) #197 unique PID
## [1] 197

March 2024 data Create wide df of estrogen and pain data

#aurora data is df0 #4745 rows
#estrogen data is e2_keyed #906 rows

#combine e2 and aurora data
e2_keyed_full <- e2_keyed %>% 
  left_join(df0,by=c("PID" = "PID")) #1217 rows x 353 columns
## Warning in left_join(., df0, by = c(PID = "PID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 15 of `x` matches multiple rows in `y`.
## ℹ Row 946 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#where is the NRS pain score
grep("NRS", names(e2_keyed_full), value = TRUE)
## [1] "ED_Somatic_NRS_SUM"  "WK2_Somatic_NRS_SUM" "WK8_Somatic_NRS_SUM"
## [4] "M3_Somatic_NRS_SUM"  "M6_Somatic_NRS_SUM"
#This is only the NRS sum count, so a value greater than 10, not the 0-10 NRS value. Will use something else.

#Do I use ED_NowPain_C or ED_Pain_C for ED pain?
#I don't have the "Date.Run_T0" value for this new set of 2024 estrogen values. maybe I don't need it ?
#added plate number to control for batch effects
#create wide df below
e2_full_wide <- e2_keyed_full[,c("PID","ED_Age","ED_HighestGrade","BMI","ED_RaceEthCode","sqrt.conc","ED_NowPain_C","WK8_Pain_C", "M3_Pain_C", "M6_Pain_C","Plate.Number", "AlternateID")] #dim 1217 x 12

e2_full_wide %>%
  inner_join(ID2,by="PID") 
## Warning in inner_join(., ID2, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 123 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
##       PID ED_Age                    ED_HighestGrade  BMI     ED_RaceEthCode
## 1  101394     22               High school graduate   NA Non-Hispanic Black
## 2  101394     22               High school graduate   NA Non-Hispanic Black
## 3  101394     22               High school graduate   NA Non-Hispanic Black
## 4  101394     22               High school graduate   NA Non-Hispanic Black
## 5  101394     22               High school graduate   NA Non-Hispanic Black
## 6  101394     22               High school graduate   NA Non-Hispanic Black
## 7  101394     22               High school graduate   NA Non-Hispanic Black
## 8  101394     22               High school graduate   NA Non-Hispanic Black
## 9  110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 10 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 11 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 12 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 13 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 14 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 15 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 16 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 17 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 18 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 19 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 20 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 21 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 22 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 23 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 24 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 25 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 26 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 27 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 28 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 29 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 30 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 31 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 32 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 33 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 34 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 35 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 36 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 37 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 38 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 39 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 40 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 41 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 42 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 43 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 44 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 45 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 46 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 47 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 48 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 49 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 50 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 51 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 52 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 53 110468     41               High school graduate 51.1 Non-Hispanic White
## 54 110468     41               High school graduate 51.1 Non-Hispanic White
## 55 110468     41               High school graduate 51.1 Non-Hispanic White
## 56 110468     41               High school graduate 51.1 Non-Hispanic White
## 57 110468     41               High school graduate 51.1 Non-Hispanic White
## 58 110468     41               High school graduate 51.1 Non-Hispanic White
## 59 110468     41               High school graduate 51.1 Non-Hispanic White
## 60 110468     41               High school graduate 51.1 Non-Hispanic White
## 61 111201     21               High school graduate 22.7 Non-Hispanic Black
## 62 111201     21               High school graduate 22.7 Non-Hispanic Black
## 63 111201     21               High school graduate 22.7 Non-Hispanic Black
## 64 111201     21               High school graduate 22.7 Non-Hispanic Black
## 65 111201     21               High school graduate 22.7 Non-Hispanic Black
## 66 111201     21               High school graduate 22.7 Non-Hispanic Black
## 67 111128     29               High school graduate 32.3 Non-Hispanic White
## 68 111128     29               High school graduate 32.3 Non-Hispanic White
## 69 111128     29               High school graduate 32.3 Non-Hispanic White
## 70 111128     29               High school graduate 32.3 Non-Hispanic White
## 71 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 72 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 73 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 74 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 75 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 76 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 77 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 78 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 79 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 80 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 81 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 82 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 83 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 84 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 85 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 86 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 87 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 88 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 89 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 90 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 91 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 92 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 93 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 94 108726     43            Some college, no degree 34.0 Non-Hispanic Black
##    sqrt.conc ED_NowPain_C WK8_Pain_C M3_Pain_C M6_Pain_C Plate.Number
## 1  14.870289            6          0         0         0            4
## 2  14.870289            6          0         0         0            4
## 3  14.870289            6          0         0         0            4
## 4  14.870289            6          0         0         0            4
## 5  14.870289            6          0         0         0            4
## 6  14.870289            6          0         0         0            4
## 7  14.870289            6          0         0         0            4
## 8  14.870289            6          0         0         0            4
## 9   3.411891            4          4         5         3            9
## 10  3.411891            4          4         5         3            9
## 11  3.411891            4          4         5         3            9
## 12  3.411891            4          4         5         3            9
## 13  3.411891            4          4         5         3            9
## 14  3.411891            4          4         5         3            9
## 15  3.411891            4          4         5         3            9
## 16  3.411891            4          4         5         3            9
## 17  3.411891            4          4         5         3            9
## 18  3.411891            4          4         5         3            9
## 19  3.411891            4          4         5         3            9
## 20  3.411891            4          4         5         3            9
## 21  4.063250            5          0         0         0            9
## 22  4.063250            5          0         0         0            9
## 23  4.063250            5          0         0         0            9
## 24  4.063250            5          0         0         0            9
## 25  4.063250            5          0         0         0            9
## 26  4.063250            5          0         0         0            9
## 27  4.063250            5          0         0         0            9
## 28  4.063250            5          0         0         0            9
## 29  4.063250            5          0         0         0            9
## 30  4.063250            5          0         0         0            9
## 31  4.063250            5          0         0         0            9
## 32  4.063250            5          0         0         0            9
## 33 13.573504            4          8         8         7           10
## 34 13.573504            4          8         8         7           10
## 35 13.573504            4          8         8         7           10
## 36 13.573504            4          8         8         7           10
## 37 13.573504            4          8         8         7           10
## 38 13.573504            4          8         8         7           10
## 39 13.573504            4          8         8         7           10
## 40 13.573504            4          8         8         7           10
## 41 13.573504            4          8         8         7           10
## 42 13.573504            4          8         8         7           10
## 43 13.573504            4          8         8         7           10
## 44 13.573504            4          8         8         7           10
## 45  3.735706            6          4        NA         3           10
## 46  3.735706            6          4        NA         3           10
## 47  3.735706            6          4        NA         3           10
## 48  3.735706            6          4        NA         3           10
## 49  3.735706            6          4        NA         3           10
## 50  3.735706            6          4        NA         3           10
## 51  3.735706            6          4        NA         3           10
## 52  3.735706            6          4        NA         3           10
## 53  4.842365            7         NA         6         5           10
## 54  4.842365            7         NA         6         5           10
## 55  4.842365            7         NA         6         5           10
## 56  4.842365            7         NA         6         5           10
## 57  4.842365            7         NA         6         5           10
## 58  4.842365            7         NA         6         5           10
## 59  4.842365            7         NA         6         5           10
## 60  4.842365            7         NA         6         5           10
## 61  6.199113            7          7         6         6           11
## 62  6.199113            7          7         6         6           11
## 63  6.199113            7          7         6         6           11
## 64  6.199113            7          7         6         6           11
## 65  6.199113            7          7         6         6           11
## 66  6.199113            7          7         6         6           11
## 67  4.701542            8          7         7         6           16
## 68  4.701542            8          7         7         6           16
## 69  4.701542            8          7         7         6           16
## 70  4.701542            8          7         7         6           16
## 71  3.681304            6          7         4         8           17
## 72  3.681304            6          7         4         8           17
## 73  3.681304            6          7         4         8           17
## 74  3.681304            6          7         4         8           17
## 75  3.681304            6          7         4         8           17
## 76  3.681304            6          7         4         8           17
## 77  3.681304            6          7         4         8           17
## 78  3.681304            6          7         4         8           17
## 79  3.681304            6          7         4         8           17
## 80  3.681304            6          7         4         8           17
## 81  3.681304            6          7         4         8           17
## 82  3.681304            6          7         4         8           17
## 83  4.382009            5          6         4         4           17
## 84  4.382009            5          6         4         4           17
## 85  4.382009            5          6         4         4           17
## 86  4.382009            5          6         4         4           17
## 87  4.382009            5          6         4         4           17
## 88  4.382009            5          6         4         4           17
## 89  4.382009            5          6         4         4           17
## 90  4.382009            5          6         4         4           17
## 91  4.382009            5          6         4         4           17
## 92  4.382009            5          6         4         4           17
## 93  4.382009            5          6         4         4           17
## 94  4.382009            5          6         4         4           17
##    AlternateID SAMPLE ID ALT ID INVENTORY CODE Sample Type sample
## 1           T1  824-1024     T3      XA0050274        RNA1    522
## 2           T1  824-1024     T0      XA0045353        RNA1      2
## 3           T1  824-1024     T3      XA0050274        RNA1    522
## 4           T1  824-1024     T0      XA0045353        RNA1      2
## 5           T1  824-1024     T3      XA0050274        RNA1    522
## 6           T1  824-1024     T0      XA0045353        RNA1      2
## 7           T1  824-1024     T3      XA0050274        RNA1    522
## 8           T1  824-1024     T0      XA0045353        RNA1      2
## 9           T3  846-1109     T1      XA0055516        RNA1    336
## 10          T3  846-1109     T0      XA0055259        RNA1    148
## 11          T3  846-1109     T1      XA0055516        RNA1    336
## 12          T3  846-1109     T0      XA0055259        RNA1    148
## 13          T3  846-1109     T1      XA0055516        RNA1    336
## 14          T3  846-1109     T0      XA0055259        RNA1    148
## 15          T3  846-1109     T1      XA0055516        RNA1    336
## 16          T3  846-1109     T0      XA0055259        RNA1    148
## 17          T3  846-1109     T1      XA0055516        RNA1    336
## 18          T3  846-1109     T0      XA0055259        RNA1    148
## 19          T3  846-1109     T1      XA0055516        RNA1    336
## 20          T3  846-1109     T0      XA0055259        RNA1    148
## 21          T3  846-1112     T1      XA0055530        RNA1    341
## 22          T3  846-1112     T0      XA0055329        RNA1    430
## 23          T3  846-1112     T1      XA0055530        RNA1    341
## 24          T3  846-1112     T0      XA0055329        RNA1    430
## 25          T3  846-1112     T1      XA0055530        RNA1    341
## 26          T3  846-1112     T0      XA0055329        RNA1    430
## 27          T3  846-1112     T1      XA0055530        RNA1    341
## 28          T3  846-1112     T0      XA0055329        RNA1    430
## 29          T3  846-1112     T1      XA0055530        RNA1    341
## 30          T3  846-1112     T0      XA0055329        RNA1    430
## 31          T3  846-1112     T1      XA0055530        RNA1    341
## 32          T3  846-1112     T0      XA0055329        RNA1    430
## 33          T3  846-1110     T1      XA0055534        RNA1    343
## 34          T3  846-1110     T0      XA0055331        RNA1    151
## 35          T3  846-1110     T1      XA0055534        RNA1    343
## 36          T3  846-1110     T0      XA0055331        RNA1    151
## 37          T3  846-1110     T1      XA0055534        RNA1    343
## 38          T3  846-1110     T0      XA0055331        RNA1    151
## 39          T3  846-1110     T1      XA0055534        RNA1    343
## 40          T3  846-1110     T0      XA0055331        RNA1    151
## 41          T3  846-1110     T1      XA0055534        RNA1    343
## 42          T3  846-1110     T0      XA0055331        RNA1    151
## 43          T3  846-1110     T1      XA0055534        RNA1    343
## 44          T3  846-1110     T0      XA0055331        RNA1    151
## 45          T3  846-1117     T1      XA0055521        RNA1    338
## 46          T3  846-1117     T0      XA0055300        RNA1    429
## 47          T3  846-1117     T1      XA0055521        RNA1    338
## 48          T3  846-1117     T0      XA0055300        RNA1    429
## 49          T3  846-1117     T1      XA0055521        RNA1    338
## 50          T3  846-1117     T0      XA0055300        RNA1    429
## 51          T3  846-1117     T1      XA0055521        RNA1    338
## 52          T3  846-1117     T0      XA0055300        RNA1    429
## 53          T3  873-1054     T0      XA0054511        RNA1    299
## 54          T3  873-1054     T1      XA0054622        RNA1    305
## 55          T3  873-1054     T0      XA0054511        RNA1    299
## 56          T3  873-1054     T1      XA0054622        RNA1    305
## 57          T3  873-1054     T0      XA0054511        RNA1    299
## 58          T3  873-1054     T1      XA0054622        RNA1    305
## 59          T3  873-1054     T0      XA0054511        RNA1    299
## 60          T3  873-1054     T1      XA0054622        RNA1    305
## 61          T3  825-1110     T0      XA0055257        RNA1    168
## 62          T3  825-1110     T1      XA0055225        RNA1    575
## 63          T3  825-1110     T0      XA0055257        RNA1    168
## 64          T3  825-1110     T1      XA0055225        RNA1    575
## 65          T3  825-1110     T0      XA0055257        RNA1    168
## 66          T3  825-1110     T1      XA0055225        RNA1    575
## 67          T3  828-1216     T0      XA0054998        RNA1    573
## 68          T3  828-1216     T1      XA0054147        RNA1    119
## 69          T3  828-1216     T0      XA0054998        RNA1    573
## 70          T3  828-1216     T1      XA0054147        RNA1    119
## 71          T3  827-1008     T1      XA0047505        RNA1    485
## 72          T3  827-1008     T0      XA0045856        RNA1    450
## 73          T3  827-1008     T1      XA0047505        RNA1    485
## 74          T3  827-1008     T0      XA0045856        RNA1    450
## 75          T3  827-1008     T1      XA0047505        RNA1    485
## 76          T3  827-1008     T0      XA0045856        RNA1    450
## 77          T3  827-1008     T1      XA0047505        RNA1    485
## 78          T3  827-1008     T0      XA0045856        RNA1    450
## 79          T3  827-1008     T1      XA0047505        RNA1    485
## 80          T3  827-1008     T0      XA0045856        RNA1    450
## 81          T3  827-1008     T1      XA0047505        RNA1    485
## 82          T3  827-1008     T0      XA0045856        RNA1    450
## 83          T3  828-1145     T0      XA0053287        RNA1    406
## 84          T3  828-1145     T1      XA0053178        RNA1     86
## 85          T3  828-1145     T0      XA0053287        RNA1    406
## 86          T3  828-1145     T1      XA0053178        RNA1     86
## 87          T3  828-1145     T0      XA0053287        RNA1    406
## 88          T3  828-1145     T1      XA0053178        RNA1     86
## 89          T3  828-1145     T0      XA0053287        RNA1    406
## 90          T3  828-1145     T1      XA0053178        RNA1     86
## 91          T3  828-1145     T0      XA0053287        RNA1    406
## 92          T3  828-1145     T1      XA0053178        RNA1     86
## 93          T3  828-1145     T0      XA0053287        RNA1    406
## 94          T3  828-1145     T1      XA0053178        RNA1     86
#at this above step which includes all time points of E2 for the March 2024 data, there are 94 overlapping PIDs with the 2021 miRNA data

#rename some of the columns
e2_full_wide <- e2_full_wide %>% 
  rename(ED_Pain = ED_NowPain_C) %>% 
  rename(WK8_Pain = WK8_Pain_C) %>% 
  rename(M3_Pain = M3_Pain_C) %>% 
  rename(M6_Pain = M6_Pain_C) %>% 
  rename(Race = ED_RaceEthCode)   

#Remove cases without estrogen measurements
e2_full_wide <- e2_full_wide[complete.cases(e2_full_wide$sqrt.conc), ] #dimensions 1161 x 12

#Keep ONLY estrogen time point T0, E2 at time of ED visit
e2_full_wide <- e2_full_wide %>% 
  filter(AlternateID == "T0") #dimensions 558 x 12

#note: after the above step of keeping only T0 values, there remain no overlapping PIDs between the March 2024 E2 data and the 2021 miRNA data. 

#summarize wide df and see how many unique PIDs
#summary(e2_full_wide)
length(unique(e2_full_wide[["PID"]])) #432 unique PID
## [1] 432

NEW MARCH 2024 CODE fix the education category

unique(e2_full_wide$ED_HighestGrade)
##  [1] "High school graduate"                                            
##  [2] "Some college, no degree"                                         
##  [3] "Associate degree: Occupational, technical, or vocational program"
##  [4] "Bachelor's degree: BA, AB, BS, BBA"                              
##  [5] "GED or equivalent"                                               
##  [6] "Associate degree: Academic program"                              
##  [7] "Doctoral degree: PhD, EdD"                                       
##  [8] "Master's degree: MA, MS, MEng, MEd, MBA"                         
##  [9] "Professional school degree: MD, DDS, DVM, JD"                    
## [10] "8th grade"                                                       
## [11] "11th grade"                                                      
## [12] "12th grade, no diploma"                                          
## [13] "9th grade"                                                       
## [14] "10th grade"                                                      
## [15] "7th grade"
#reduce education categories to 4 levels
level_mapping <- c(
  "Some college, no degree" = "Some College or Associate Degree",
  "Professional school degree: MD, DDS, DVM, JD" = "Advanced Degree",
  "Associate degree: Occupational, technical, or vocational program" = "Some College or Associate Degree",
  "High school graduate" = "High School or Less",
  "Bachelor's degree: BA, AB, BS, BBA" = "Bachelor's Degree",
  "Master's degree: MA, MS, MEng, MEd, MBA" = "Advanced Degree",
  "GED or equivalent" = "High School or Less",
  "8th grade" = "High School or Less",
  "Doctoral degree: PhD, EdD" = "Advanced Degree",
  "Associate degree: Academic program" = "Some College or Associate Degree",
  "12th grade, no diploma" = "High School or Less",
  "11th grade" = "High School or Less",
  "9th grade" = "High School or Less",
  "10th grade" = "High School or Less",
  "7th grade" = "High School or Less"
)

#apply new mapping to education category
e2_full_wide$ED_HighestGrade <- factor(e2_full_wide$ED_HighestGrade, levels = names(level_mapping), labels = level_mapping)
#dim 558 x 12

Original code

gather(E2_women_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
  mutate(time = ifelse(names=="WK2_Pain",0.5,ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA))))) %>%
  arrange(PID,time) -> E2_women_long

E2_women_long <- E2_women_long[complete.cases(E2_women_long),]
length(unique(E2_women_long[["PID"]])) #164 PID
## [1] 164
#summary(E2_women_long$E2.Concentration..pg.ml._T0_sqrt)

March 2024 data Create long df of estrogen and pain data changes: updated df used, removed Week2 pain rename

gather(e2_full_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
  mutate(time = ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA)))) %>%
  arrange(PID,time) -> e2_full_long 

e2_full_long <- e2_full_long[complete.cases(e2_full_long),] #dim 1541 x 12
length(unique(e2_full_long[["PID"]])) #397 PID
## [1] 397
#summary(e2_full_long$sqrt.conc)

#there are a few random duplicates that need to be removed
e2_full_long <- e2_full_long[!duplicated(e2_full_long), ] #dim 1163 x 12

ORIGINAL GLS MODEL CODE

#model (table2a, left side)
E2_women_long$E2_Concentration_T0_sqrt = E2_women_long$E2.Concentration..pg.ml._T0_sqrt
E2_women_long$Date_Run_T0 = E2_women_long$Date.Run_T0

E2_women_model <- gls(pain ~ time + E2_Concentration_T0_sqrt + ED_Age + HighestGrade + BMI + ED_NowPain.x + Race + Date_Run_T0, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=E2_women_long)

summary(E2_women_model)
## Generalized least squares fit by REML
##   Model: pain ~ time + E2_Concentration_T0_sqrt + ED_Age + HighestGrade +      BMI + ED_NowPain.x + Race + Date_Run_T0 
##   Data: E2_women_long 
##        AIC      BIC    logLik
##   2257.843 2303.215 -1117.922
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.7745422 
## 
## Coefficients:
##                               Value Std.Error   t-value p-value
## (Intercept)              -210.08763 215.99869 -0.972634  0.3312
## time                       -0.26382   0.07157 -3.686142  0.0003
## E2_Concentration_T0_sqrt   -0.04854   0.04138 -1.173080  0.2414
## ED_Age                      0.03183   0.01658  1.920209  0.0555
## HighestGrade               -0.16961   0.24209 -0.700584  0.4839
## BMI                         0.02026   0.02838  0.713853  0.4757
## ED_NowPain.x                0.37291   0.08619  4.326428  0.0000
## Race                        0.37034   0.38705  0.956813  0.3392
## Date_Run_T0                 0.00480   0.00490  0.978950  0.3281
## 
##  Correlation: 
##                          (Intr) time   E2_C_T ED_Age HghstG BMI    ED_NP.
## time                     -0.004                                          
## E2_Concentration_T0_sqrt  0.193  0.012                                   
## ED_Age                   -0.036 -0.008  0.269                            
## HighestGrade              0.186 -0.006 -0.018 -0.300                     
## BMI                      -0.065  0.003  0.007 -0.312  0.080              
## ED_NowPain.x             -0.056 -0.004 -0.092 -0.048  0.052 -0.228       
## Race                     -0.159 -0.020 -0.082  0.018  0.159 -0.064 -0.104
## Date_Run_T0              -1.000  0.002 -0.194  0.035 -0.190  0.062  0.055
##                          Race  
## time                           
## E2_Concentration_T0_sqrt       
## ED_Age                         
## HighestGrade                   
## BMI                            
## ED_NowPain.x                   
## Race                           
## Date_Run_T0               0.154
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.11992096 -0.78910606 -0.05550526  0.79878542  2.21845145 
## 
## Residual standard error: 3.130053 
## Degrees of freedom: 466 total; 457 residual

MARCH 2024 NEW E2 DATA CODE GLS MODEL

#took out highest grade because its not coded correctly 
e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)

summary(e2_model)
## Generalized least squares fit by REML
##   Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain +      Race + Plate.Number 
##   Data: e2_full_long 
##       AIC      BIC    logLik
##   5566.51 5642.223 -2768.255
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##    Phi1 
## 0.73062 
## 
## Coefficients:
##                                         Value Std.Error   t-value p-value
## (Intercept)                         1.9370128 0.8249753  2.347965  0.0190
## time                               -0.2372621 0.0453816 -5.228156  0.0000
## sqrt.conc                           0.0097417 0.0403274  0.241564  0.8092
## ED_Age                              0.0433499 0.0096850  4.475991  0.0000
## ED_HighestGradeAdvanced Degree     -1.1040972 0.5188331 -2.128039  0.0335
## ED_HighestGradeHigh School or Less  0.0062920 0.2810954  0.022384  0.9821
## ED_HighestGradeBachelor's Degree   -0.6553079 0.3530373 -1.856200  0.0637
## BMI                                 0.0268486 0.0147667  1.818177  0.0693
## ED_Pain                             0.2596334 0.0491419  5.283337  0.0000
## RaceNon-Hispanic Black             -0.5629988 0.3746560 -1.502709  0.1332
## RaceNon-Hispanic Other             -0.4014009 0.5972709 -0.672058  0.5017
## RaceNon-Hispanic White             -0.5940566 0.3903109 -1.522009  0.1283
## Plate.Number                       -0.0234922 0.0187327 -1.254078  0.2101
## 
##  Correlation: 
##                                    (Intr) time   sqrt.c ED_Age ED_HGAD ED_SoL
## time                               -0.226                                    
## sqrt.conc                          -0.301  0.001                             
## ED_Age                             -0.409  0.004  0.251                      
## ED_HighestGradeAdvanced Degree     -0.219  0.002 -0.015 -0.098               
## ED_HighestGradeHigh School or Less -0.245  0.000  0.025  0.105  0.237        
## ED_HighestGradeBachelor's Degree   -0.156  0.000  0.008 -0.069  0.244   0.339
## BMI                                -0.465  0.006 -0.101 -0.167  0.069   0.051
## ED_Pain                            -0.444  0.002 -0.060  0.046  0.136  -0.074
## RaceNon-Hispanic Black             -0.310  0.000 -0.050 -0.064  0.221   0.042
## RaceNon-Hispanic Other             -0.258  0.002 -0.048  0.027  0.118  -0.002
## RaceNon-Hispanic White             -0.316 -0.002 -0.055 -0.089  0.131   0.059
## Plate.Number                       -0.285  0.000  0.004 -0.022  0.063   0.066
##                                    ED_HGBD BMI    ED_Pan RcN-HB RcN-HO RcN-HW
## time                                                                         
## sqrt.conc                                                                    
## ED_Age                                                                       
## ED_HighestGradeAdvanced Degree                                               
## ED_HighestGradeHigh School or Less                                           
## ED_HighestGradeBachelor's Degree                                             
## BMI                                 0.032                                    
## ED_Pain                             0.070   0.013                            
## RaceNon-Hispanic Black              0.079  -0.011  0.015                     
## RaceNon-Hispanic Other              0.048   0.022  0.096  0.475              
## RaceNon-Hispanic White             -0.045  -0.008  0.159  0.718  0.456       
## Plate.Number                        0.014   0.062  0.076 -0.038 -0.046 -0.108
## 
## Standardized residuals:
##          Min           Q1          Med           Q3          Max 
## -2.096733348 -0.819667252  0.005922671  0.718887652  2.479985354 
## 
## Residual standard error: 3.005848 
## Degrees of freedom: 1163 total; 1150 residual

ok well thats not exactly stellar. let’s see what we’re working with

#visualize pain over time
ggplot(e2_full_long, aes(x = as.factor(time), y = pain, group = PID, color = PID)) +
  geom_line() +
  geom_point() +
  labs(x = "Time Point", y = "Pain Score") +
  ggtitle("Pain Scores Over Time by PID") +
  theme_minimal()

# try something else bc thats awful
summary_df <- e2_full_long %>%
  group_by(time) %>%
  summarise(mean_pain = mean(pain), median_pain = median(pain))

# Plot mean or median pain scores at each time point
ggplot(summary_df, aes(x = time)) +
  geom_line(aes(y = mean_pain), color = "blue", size = 1.5) +  # Mean pain scores
  geom_point(aes(y = mean_pain), color = "blue", size = 3) +
  geom_line(aes(y = median_pain), color = "red", size = 1.5, linetype = "dashed") +  # Median pain scores
  geom_point(aes(y = median_pain), color = "red", size = 3) +
  geom_text(aes(x = 3.5, y = 4.2, label = "Mean"), color = "blue", size = 4, hjust = -0.2) +  # Text for mean
  geom_text(aes(x = 5, y = 4.1, label = "Median"), color = "red", size = 4, hjust = -0.2) +  # Text for median
  labs(x = "Time Point", y = "Pain Score", title = "Summary of Pain Scores Over Time") +
  scale_x_continuous(breaks = c(2, 3, 6))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_text(aes(x = 3.5, y = 4.2, label = "Mean"), color = "blue", : All aesthetics have length 1, but the data has 3 rows.
## ℹ Did you mean to use `annotate()`?
## Warning in geom_text(aes(x = 5, y = 4.1, label = "Median"), color = "red", : All aesthetics have length 1, but the data has 3 rows.
## ℹ Did you mean to use `annotate()`?

visualize estrogen versus pain over time

ggplot(e2_full_long, aes(x = sqrt.conc, y = pain, color = factor(time), shape = factor(time))) +
  geom_point(size = 3) +
  labs(x = "Estrogen Concentration (sqrt)", y = "Pain Level", color = "Time", shape = "Time") +
  scale_color_discrete(labels = c("2" = "Time 2", "3" = "Time 3", "6" = "Time 6")) +
  scale_shape_manual(values = c(16, 17, 18), labels = c("2" = "Time 2", "3" = "Time 3", "6" = "Time 6")) 

ggplot(e2_full_long, aes(x = sqrt.conc, fill = factor(time))) +
  geom_density(alpha = 0.5) +
  labs(x = "Estrogen Concentration (sqrt)", y = "Density", fill = "Time") +
  facet_wrap(~ time, ncol = 1) +
  theme_minimal()

summary stats

e2_full_long %>%
  group_by(time, pain) %>%
  summarise(
    Median_E2 = median(sqrt.conc, na.rm = TRUE),
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
## # A tibble: 33 × 3
## # Groups:   time [3]
##     time  pain Median_E2
##    <dbl> <int>     <dbl>
##  1     2     0      5.21
##  2     2     1      4.89
##  3     2     2      4.26
##  4     2     3      4.82
##  5     2     4      4.58
##  6     2     5      4.67
##  7     2     6      5.19
##  8     2     7      5.58
##  9     2     8      4.95
## 10     2     9      5.05
## # ℹ 23 more rows
ggplot(e2_full_long, aes(x = sqrt.conc, y = pain)) +
  geom_point(size=4, alpha=.1,color="firebrick4", fill="violetred",shape=21)

cor(e2_full_long$sqrt,e2_full_long$pain)
## [1] -0.00841645

idea.. refactor pain to two categories: low to none and moderate to severe

e2_full_long <- e2_full_long %>%
  mutate(bin_pain = cut(pain, breaks = c(-Inf, 4, 10), labels = c("Low to None", "Moderate to Severe"))) #dimensions 1163 x 13


#review summary stats
e2_full_long %>%
  group_by(time, bin_pain) %>%
  summarise(
    Mean_E2 = mean(sqrt.conc, na.rm = TRUE),
    Median_E2 = median(sqrt.conc, na.rm = TRUE),
  )
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups:   time [3]
##    time bin_pain           Mean_E2 Median_E2
##   <dbl> <fct>                <dbl>     <dbl>
## 1     2 Low to None           5.58      4.80
## 2     2 Moderate to Severe    6.05      5.08
## 3     3 Low to None           5.84      4.86
## 4     3 Moderate to Severe    5.86      4.98
## 5     6 Low to None           5.87      4.86
## 6     6 Moderate to Severe    5.79      4.98
ggplot(e2_full_long, aes(x = sqrt.conc, fill = bin_pain)) +
  geom_histogram(binwidth = .2) +
  facet_grid(time ~ .) +
  labs(x = "Square Root of Estrogen Concentration", y = "Frequency", fill = "Pain Level")

ggplot(e2_full_long, aes(x = time, y = sqrt.conc, color = bin_pain)) +
  geom_point() +
  labs(x = "Time", y = "Square Root of Estrogen Concentration", color = "Pain Level")

lets look at the e2 values and re-level

#visualize
summary(e2_full_long$sqrt.conc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.325   3.867   4.906   5.841   6.844  20.137
ggplot(data = e2_full_long, aes(x = sqrt.conc, y = ..density..)) +
  geom_density(color = "deeppink3", fill = "deeppink3", alpha = .4, size=1.8) +
  labs(x = "Estrogen Level (sqrt.conc)", y = "Density",
       title = "Density Plot of Estrogen Levels") + 
  geom_histogram(data = e2_full_long, aes(x = sqrt.conc, y = ..density..),
                 fill = "skyblue", color = "gray20", alpha = 0.9, bins = 50)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#visualize ED estrogen levels versus pain at month 6
df_m6 <- e2_full_long %>% 
  filter(time == 6) #dimensions 397 x 13

ggplot(df_m6, aes(x = bin_pain, y = sqrt.conc)) +
  geom_boxplot() +
  labs(title = "Box Plot of sqrt.conc by bin_pain at month 6")

ggplot(df_m6, aes(x = sqrt.conc, y = pain)) +
  geom_point() +
  labs(title = "Scatter Plot of sqrt.conc versus pain at month 6",
       x = "Sqrt.Conc",
       y = "Pain")

summary table

df_m6 %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain) %>%
  tbl_summary(by = bin_pain,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p()
Characteristic Low to None, N = 2311 Moderate to Severe, N = 1661 p-value2
ED_Age 36 (14) 39 (13) 0.012
ED_HighestGrade

0.2
    Some College or Associate Degree 91 (39%) 68 (41%)
    Advanced Degree 19 (8.2%) 8 (4.8%)
    High School or Less 76 (33%) 66 (40%)
    Bachelor's Degree 45 (19%) 24 (14%)
BMI 29 (8) 31 (9) 0.008
Race

0.3
    Hispanic 29 (13%) 28 (17%)
    Non-Hispanic Black 98 (42%) 77 (46%)
    Non-Hispanic Other 16 (6.9%) 7 (4.2%)
    Non-Hispanic White 88 (38%) 54 (33%)
sqrt.conc 5.87 (3.07) 5.79 (3.13) 0.7
ED_Pain 6 (3) 7 (2) <0.001
pain 1 (2) 7 (2) <0.001
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test
#explore cut off points, pick some levels
summary(e2_full_long$sqrt.conc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.325   3.867   4.906   5.841   6.844  20.137
length(e2_full_long[e2_full_long$sqrt.conc >= 10, "sqrt.conc"]) #109 values greater than or equal to 10
## [1] 109
length(e2_full_long[e2_full_long$sqrt.conc <= 3, "sqrt.conc"]) #101 values less than or equal to 3
## [1] 101
length(e2_full_long[e2_full_long$sqrt.conc >= 6.844, "sqrt.conc"]) #291 values greater than or equal to 3rd quartile
## [1] 291
length(e2_full_long[e2_full_long$sqrt.conc <= 3.867, "sqrt.conc"]) #289 values less than or equal to 1st quartile
## [1] 289
length(e2_full_long$sqrt.conc[e2_full_long$sqrt.conc >= 3.867 & e2_full_long$sqrt.conc <= 6.844]) #583 values between 1st and 3rd quartile
## [1] 583
#583+289+291 = 1153

make new categories based on this exploration..

e2_full_long <- e2_full_long %>%
  mutate(e2_bin = case_when(
    sqrt.conc <= 3 ~ "low E2",
    sqrt.conc > 3 & sqrt.conc < 10 ~ "mid E2",
    sqrt.conc >= 10 ~ "high E2"
  ))
#new dimensions 1163 x 14

summary table with new categories

e2_full_long %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  add_overall() %>%
  add_p()%>%
  bold_p()
Full Table: Features by Estrogen Bin, ALL TIME POINTS
Characteristic Overall, N = 1,1631 Relative Estrogen Bin p-value2
high E2, N = 1091 low E2, N = 1011 mid E2, N = 9531
ED_Age 37 (13) 31 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.024
    Some College or Associate Degree 466 (40%) 45 (41%) 28 (28%) 393 (41%)
    Advanced Degree 81 (7.0%) 6 (5.5%) 3 (3.0%) 72 (7.6%)
    High School or Less 416 (36%) 42 (39%) 44 (44%) 330 (35%)
    Bachelor's Degree 200 (17%) 16 (15%) 26 (26%) 158 (17%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.2
Race



<0.001
    Hispanic 167 (14%) 9 (8.3%) 17 (17%) 141 (15%)
    Non-Hispanic Black 514 (44%) 47 (43%) 35 (35%) 432 (45%)
    Non-Hispanic Other 69 (5.9%) 6 (5.5%) 15 (15%) 48 (5.0%)
    Non-Hispanic White 413 (36%) 47 (43%) 34 (34%) 332 (35%)
sqrt.conc 5.84 (3.09) 13.44 (2.61) 2.46 (0.39) 5.33 (1.67) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.5
bin_pain



0.2
    Low to None 601 (52%) 54 (50%) 44 (44%) 503 (53%)
    Moderate to Severe 562 (48%) 55 (50%) 57 (56%) 450 (47%)
pain 4 (3) 4 (3) 5 (3) 4 (3) 0.005
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#above, this shows the category pain is significant (p=.005). this is dependent on time point, which is not in this summary table. low E2 has a mean pain of 5 while mid and high E2 have a mean pain of 4, across all time points. 
#when all time points are combined, pain is significant across the different E2 categories
#but below, when i stratify by time point, it is no longer significant

#summary table for MONTH 2
e2_full_long %>%
  filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT
Characteristic Overall, N = 3841 Relative Estrogen Bin p-value2
high E2, N = 361 low E2, N = 341 mid E2, N = 3141
ED_Age 37 (13) 31 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.5
    Some College or Associate Degree 155 (40%) 15 (42%) 9 (26%) 131 (42%)
    Advanced Degree 27 (7.0%) 2 (5.6%) 1 (2.9%) 24 (7.6%)
    High School or Less 135 (35%) 14 (39%) 15 (44%) 106 (34%)
    Bachelor's Degree 67 (17%) 5 (14%) 9 (26%) 53 (17%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.6
Race



0.3
    Hispanic 55 (14%) 3 (8.3%) 6 (18%) 46 (15%)
    Non-Hispanic Black 170 (44%) 16 (44%) 11 (32%) 143 (46%)
    Non-Hispanic Other 23 (6.0%) 2 (5.6%) 5 (15%) 16 (5.1%)
    Non-Hispanic White 136 (35%) 15 (42%) 12 (35%) 109 (35%)
sqrt.conc 5.84 (3.11) 13.47 (2.66) 2.45 (0.40) 5.33 (1.67) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.7
bin_pain



0.3
    Low to None 174 (45%) 13 (36%) 13 (38%) 148 (47%)
    Moderate to Severe 210 (55%) 23 (64%) 21 (62%) 166 (53%)
pain 5 (3) 5 (3) 6 (3) 5 (3) 0.10
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
#summary table for MONTH 3
e2_full_long %>%
  filter(time == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT
Characteristic Overall, N = 3821 Relative Estrogen Bin p-value2
high E2, N = 361 low E2, N = 321 mid E2, N = 3141
ED_Age 37 (13) 32 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.7
    Some College or Associate Degree 152 (40%) 15 (42%) 9 (28%) 128 (41%)
    Advanced Degree 27 (7.1%) 2 (5.6%) 1 (3.1%) 24 (7.6%)
    High School or Less 139 (36%) 14 (39%) 14 (44%) 111 (35%)
    Bachelor's Degree 64 (17%) 5 (14%) 8 (25%) 51 (16%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.6
Race



0.3
    Hispanic 55 (14%) 3 (8.3%) 5 (16%) 47 (15%)
    Non-Hispanic Black 169 (44%) 15 (42%) 12 (38%) 142 (45%)
    Non-Hispanic Other 23 (6.0%) 2 (5.6%) 5 (16%) 16 (5.1%)
    Non-Hispanic White 135 (35%) 16 (44%) 10 (31%) 109 (35%)
sqrt.conc 5.85 (3.08) 13.38 (2.61) 2.47 (0.38) 5.33 (1.68) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.9
bin_pain



0.7
    Low to None 196 (51%) 19 (53%) 14 (44%) 163 (52%)
    Moderate to Severe 186 (49%) 17 (47%) 18 (56%) 151 (48%)
pain 4 (3) 4 (3) 5 (3) 4 (3) 0.2
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
#summary table for MONTH 6
e2_full_long %>%
  filter(time == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT
Characteristic Overall, N = 3971 Relative Estrogen Bin p-value2
high E2, N = 371 low E2, N = 351 mid E2, N = 3251
ED_Age 37 (13) 31 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.7
    Some College or Associate Degree 159 (40%) 15 (41%) 10 (29%) 134 (41%)
    Advanced Degree 27 (6.8%) 2 (5.4%) 1 (2.9%) 24 (7.4%)
    High School or Less 142 (36%) 14 (38%) 15 (43%) 113 (35%)
    Bachelor's Degree 69 (17%) 6 (16%) 9 (26%) 54 (17%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.6
Race



0.3
    Hispanic 57 (14%) 3 (8.1%) 6 (17%) 48 (15%)
    Non-Hispanic Black 175 (44%) 16 (43%) 12 (34%) 147 (45%)
    Non-Hispanic Other 23 (5.8%) 2 (5.4%) 5 (14%) 16 (4.9%)
    Non-Hispanic White 142 (36%) 16 (43%) 12 (34%) 114 (35%)
sqrt.conc 5.84 (3.09) 13.46 (2.63) 2.46 (0.40) 5.33 (1.67) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.8
bin_pain



0.5
    Low to None 231 (58%) 22 (59%) 17 (49%) 192 (59%)
    Moderate to Severe 166 (42%) 15 (41%) 18 (51%) 133 (41%)
pain 4 (3) 4 (3) 5 (3) 4 (3) 0.086
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test

what if I only look at low and high e2 levels?

e2_full_long %>%
  filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall()
Characteristic Overall, N = 2101 high E2, N = 1091 low E2, N = 1011 p-value2
ED_Age 38 (13) 31 (9) 46 (12) <0.001
ED_HighestGrade


0.070
    Some College or Associate Degree 73 (35%) 45 (41%) 28 (28%)
    Advanced Degree 9 (4.3%) 6 (5.5%) 3 (3.0%)
    High School or Less 86 (41%) 42 (39%) 44 (44%)
    Bachelor's Degree 42 (20%) 16 (15%) 26 (26%)
BMI 29 (8) 30 (9) 28 (6) 0.3
Race


0.020
    Hispanic 26 (12%) 9 (8.3%) 17 (17%)
    Non-Hispanic Black 82 (39%) 47 (43%) 35 (35%)
    Non-Hispanic Other 21 (10%) 6 (5.5%) 15 (15%)
    Non-Hispanic White 81 (39%) 47 (43%) 34 (34%)
sqrt.conc 8.2 (5.8) 13.4 (2.6) 2.5 (0.4) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 0.2
bin_pain


0.4
    Low to None 98 (47%) 54 (50%) 44 (44%)
    Moderate to Severe 112 (53%) 55 (50%) 57 (56%)
pain 5 (3) 4 (3) 5 (3) 0.014
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
#this shows pain across all time points is sig (p=.014)

repeat this summary table for each time point

e2_full_long %>%
  filter(time == 2) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall() #2 month, p =.3 for pain
## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
Characteristic Overall, N = 701 high E2, N = 361 low E2, N = 341 p-value2
ED_Age 38 (13) 31 (9) 46 (12) <0.001
ED_HighestGrade


0.4
    Some College or Associate Degree 24 (34%) 15 (42%) 9 (26%)
    Advanced Degree 3 (4.3%) 2 (5.6%) 1 (2.9%)
    High School or Less 29 (41%) 14 (39%) 15 (44%)
    Bachelor's Degree 14 (20%) 5 (14%) 9 (26%)
BMI 29 (8) 30 (9) 28 (6) 0.5
Race


0.3
    Hispanic 9 (13%) 3 (8.3%) 6 (18%)
    Non-Hispanic Black 27 (39%) 16 (44%) 11 (32%)
    Non-Hispanic Other 7 (10%) 2 (5.6%) 5 (15%)
    Non-Hispanic White 27 (39%) 15 (42%) 12 (35%)
sqrt.conc 8.1 (5.9) 13.5 (2.7) 2.4 (0.4) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 0.4
bin_pain


0.9
    Low to None 26 (37%) 13 (36%) 13 (38%)
    Moderate to Severe 44 (63%) 23 (64%) 21 (62%)
pain 5 (3) 5 (3) 6 (3) 0.3
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test
e2_full_long %>%
  filter(time == 3) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall() #3 month, p =.12 for pain
## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
Characteristic Overall, N = 681 high E2, N = 361 low E2, N = 321 p-value2
ED_Age 38 (12) 32 (9) 46 (12) <0.001
ED_HighestGrade


0.5
    Some College or Associate Degree 24 (35%) 15 (42%) 9 (28%)
    Advanced Degree 3 (4.4%) 2 (5.6%) 1 (3.1%)
    High School or Less 28 (41%) 14 (39%) 14 (44%)
    Bachelor's Degree 13 (19%) 5 (14%) 8 (25%)
BMI 29 (8) 30 (9) 28 (6) 0.5
Race


0.4
    Hispanic 8 (12%) 3 (8.3%) 5 (16%)
    Non-Hispanic Black 27 (40%) 15 (42%) 12 (38%)
    Non-Hispanic Other 7 (10%) 2 (5.6%) 5 (16%)
    Non-Hispanic White 26 (38%) 16 (44%) 10 (31%)
sqrt.conc 8.2 (5.8) 13.4 (2.6) 2.5 (0.4) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 0.6
bin_pain


0.5
    Low to None 33 (49%) 19 (53%) 14 (44%)
    Moderate to Severe 35 (51%) 17 (47%) 18 (56%)
pain 4 (3) 4 (3) 5 (3) 0.12
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test
e2_full_long %>%
  filter(time == 6) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() %>% add_overall() #6 month, p =.081 for pain
## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
Characteristic Overall, N = 721 high E2, N = 371 low E2, N = 351 p-value2
ED_Age 38 (13) 31 (9) 46 (12) <0.001
ED_HighestGrade


0.6
    Some College or Associate Degree 25 (35%) 15 (41%) 10 (29%)
    Advanced Degree 3 (4.2%) 2 (5.4%) 1 (2.9%)
    High School or Less 29 (40%) 14 (38%) 15 (43%)
    Bachelor's Degree 15 (21%) 6 (16%) 9 (26%)
BMI 29 (8) 30 (9) 28 (6) 0.6
Race


0.4
    Hispanic 9 (13%) 3 (8.1%) 6 (17%)
    Non-Hispanic Black 28 (39%) 16 (43%) 12 (34%)
    Non-Hispanic Other 7 (9.7%) 2 (5.4%) 5 (14%)
    Non-Hispanic White 28 (39%) 16 (43%) 12 (34%)
sqrt.conc 8.1 (5.9) 13.5 (2.6) 2.5 (0.4) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 0.5
bin_pain


0.4
    Low to None 39 (54%) 22 (59%) 17 (49%)
    Moderate to Severe 33 (46%) 15 (41%) 18 (51%)
pain 4 (3) 4 (3) 5 (3) 0.081
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test

damn !! i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case

visuals

ggplot(e2_full_long, aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 

#from this we see that mid and high estrogen had more 0s. low E2 had more 10s
#let's repeat this by time point to see we aren't getting auto-correlation

#BELOW: PAIN AT 2 MONTHS
ggplot(data = e2_full_long %>% filter(time == 2), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 

#BELOW: PAIN AT 3 MONTHS
ggplot(data = e2_full_long %>% filter(time == 3), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 

#BELOW: PAIN AT 6 MONTHS
ggplot(data = e2_full_long %>% filter(time == 6), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 

#boxplot of e2_bins and pain at month 2
ggplot(e2_full_long %>% filter(time == 2), aes(x = pain, y = e2_bin)) +
  geom_boxplot() +
  labs(title = "E2_bin X Pain at month 2")

#boxplot of e2_bins and pain at month 3
ggplot(e2_full_long %>% filter(time == 3), aes(x = pain, y = e2_bin)) +
  geom_boxplot() +
  labs(title = "E2_bin X Pain at month 3")

#boxplot of e2_bins and pain at month 6
ggplot(e2_full_long %>% filter(time == 6), aes(x = pain, y = e2_bin)) +
  geom_boxplot() +
  labs(title = "E2_bin X Pain at month 6")

Ok this is very useful!! We see that low E2 has fewer 0s and more 10s at each time point. This makes me think a different pain stratification should take place that focuses on the extremes of the pain NRS scale.

#high E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "high E2"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "High E2 x Pain",
       x = "Pain", y = "Frequency")

#mid E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "mid E2"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "Mid E2 x Pain",
       x = "Pain", y = "Frequency")

#low E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "low E2"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "Low E2 x Pain",
       x = "Pain", y = "Frequency")

#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 2, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 2 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 3, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 3 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 6, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 6 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

chi square ?

table(e2_full_long$e2_bin, as.factor(e2_full_long$pain))
##          
##             0   1   2   3   4   5   6   7   8   9  10
##   high E2  27   6   9   6   6  15   9  12  10   1   8
##   low E2    7  11   5  10  11  13   9   5   6   3  21
##   mid E2  188  75  55  93  92 111 111  75  71  35  47
table(e2_full_long$e2_bin, e2_full_long$bin_pain)
##          
##           Low to None Moderate to Severe
##   high E2          54                 55
##   low E2           44                 57
##   mid E2          503                450
chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain))
## Warning in chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain)):
## Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and as.factor(e2_full_long$pain)
## X-squared = 59.641, df = 20, p-value = 8.093e-06
chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 3.3259, df = 2, p-value = 0.1896
kruskal.test(pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pain by e2_bin
## Kruskal-Wallis chi-squared = 10.768, df = 2, p-value = 0.00459

make new levels for pain

e2_full_long <- e2_full_long %>%
  mutate(bin_pain2 = case_when(
    pain < 2 ~ "0 or 1 pain",
    pain > 1 & pain < 9 ~ "2 to 8 pain",
    pain > 8 ~ "9 or 10 pain"
  ))
#new dimensions 1163 x 15

e2_full_long <- e2_full_long %>%
  mutate(bin_pain3 = case_when(
    pain < 1 ~ "0 pain",
    pain > 0 & pain < 10 ~ "1 to 9 pain",
    pain > 9 ~ "10 pain"
  ))
#new dimensions 1163 x 16

summary table

#each time point
e2_full_long %>%
  #filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
Combined Table: Features by Estrogen Bin, ALL TIME POINTS
Characteristic Overall, N = 1,1631 Relative Estrogen Bin p-value2
high E2, N = 1091 low E2, N = 1011 mid E2, N = 9531
ED_Age 37 (13) 31 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.024
    Some College or Associate Degree 466 (40%) 45 (41%) 28 (28%) 393 (41%)
    Advanced Degree 81 (7.0%) 6 (5.5%) 3 (3.0%) 72 (7.6%)
    High School or Less 416 (36%) 42 (39%) 44 (44%) 330 (35%)
    Bachelor's Degree 200 (17%) 16 (15%) 26 (26%) 158 (17%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.2
Race



<0.001
    Hispanic 167 (14%) 9 (8.3%) 17 (17%) 141 (15%)
    Non-Hispanic Black 514 (44%) 47 (43%) 35 (35%) 432 (45%)
    Non-Hispanic Other 69 (5.9%) 6 (5.5%) 15 (15%) 48 (5.0%)
    Non-Hispanic White 413 (36%) 47 (43%) 34 (34%) 332 (35%)
sqrt.conc 5.84 (3.09) 13.44 (2.61) 2.46 (0.39) 5.33 (1.67) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.5
bin_pain



0.2
    Low to None 601 (52%) 54 (50%) 44 (44%) 503 (53%)
    Moderate to Severe 562 (48%) 55 (50%) 57 (56%) 450 (47%)
pain 4 (3) 4 (3) 5 (3) 4 (3) 0.005
bin_pain2



<0.001
    0 or 1 pain 314 (27%) 33 (30%) 18 (18%) 263 (28%)
    2 to 8 pain 734 (63%) 67 (61%) 59 (58%) 608 (64%)
    9 or 10 pain 115 (9.9%) 9 (8.3%) 24 (24%) 82 (8.6%)
bin_pain3



<0.001
    0 pain 222 (19%) 27 (25%) 7 (6.9%) 188 (20%)
    1 to 9 pain 865 (74%) 74 (68%) 73 (72%) 718 (75%)
    10 pain 76 (6.5%) 8 (7.3%) 21 (21%) 47 (4.9%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#bin pain 2 and bin pain 3 significant (p<.001)

#month 2
e2_full_long %>%
  filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
WK8 Table: Features by Estrogen Bin, 8 weeks
Characteristic Overall, N = 3841 Relative Estrogen Bin p-value2
high E2, N = 361 low E2, N = 341 mid E2, N = 3141
ED_Age 37 (13) 31 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.5
    Some College or Associate Degree 155 (40%) 15 (42%) 9 (26%) 131 (42%)
    Advanced Degree 27 (7.0%) 2 (5.6%) 1 (2.9%) 24 (7.6%)
    High School or Less 135 (35%) 14 (39%) 15 (44%) 106 (34%)
    Bachelor's Degree 67 (17%) 5 (14%) 9 (26%) 53 (17%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.6
Race



0.3
    Hispanic 55 (14%) 3 (8.3%) 6 (18%) 46 (15%)
    Non-Hispanic Black 170 (44%) 16 (44%) 11 (32%) 143 (46%)
    Non-Hispanic Other 23 (6.0%) 2 (5.6%) 5 (15%) 16 (5.1%)
    Non-Hispanic White 136 (35%) 15 (42%) 12 (35%) 109 (35%)
sqrt.conc 5.84 (3.11) 13.47 (2.66) 2.45 (0.40) 5.33 (1.67) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.7
bin_pain



0.3
    Low to None 174 (45%) 13 (36%) 13 (38%) 148 (47%)
    Moderate to Severe 210 (55%) 23 (64%) 21 (62%) 166 (53%)
pain 5 (3) 5 (3) 6 (3) 5 (3) 0.10
bin_pain2



0.3
    0 or 1 pain 72 (19%) 7 (19%) 4 (12%) 61 (19%)
    2 to 8 pain 267 (70%) 26 (72%) 22 (65%) 219 (70%)
    9 or 10 pain 45 (12%) 3 (8.3%) 8 (24%) 34 (11%)
bin_pain3



0.047
    0 pain 48 (13%) 6 (17%) 2 (5.9%) 40 (13%)
    1 to 9 pain 307 (80%) 27 (75%) 25 (74%) 255 (81%)
    10 pain 29 (7.6%) 3 (8.3%) 7 (21%) 19 (6.1%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
#bin pain 3 significant (p=0.047) but not bin_pain2

#month 3
e2_full_long %>%
  filter(time == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
M3 Table: Features by Estrogen Bin, 3 months
Characteristic Overall, N = 3821 Relative Estrogen Bin p-value2
high E2, N = 361 low E2, N = 321 mid E2, N = 3141
ED_Age 37 (13) 32 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.7
    Some College or Associate Degree 152 (40%) 15 (42%) 9 (28%) 128 (41%)
    Advanced Degree 27 (7.1%) 2 (5.6%) 1 (3.1%) 24 (7.6%)
    High School or Less 139 (36%) 14 (39%) 14 (44%) 111 (35%)
    Bachelor's Degree 64 (17%) 5 (14%) 8 (25%) 51 (16%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.6
Race



0.3
    Hispanic 55 (14%) 3 (8.3%) 5 (16%) 47 (15%)
    Non-Hispanic Black 169 (44%) 15 (42%) 12 (38%) 142 (45%)
    Non-Hispanic Other 23 (6.0%) 2 (5.6%) 5 (16%) 16 (5.1%)
    Non-Hispanic White 135 (35%) 16 (44%) 10 (31%) 109 (35%)
sqrt.conc 5.85 (3.08) 13.38 (2.61) 2.47 (0.38) 5.33 (1.68) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.9
bin_pain



0.7
    Low to None 196 (51%) 19 (53%) 14 (44%) 163 (52%)
    Moderate to Severe 186 (49%) 17 (47%) 18 (56%) 151 (48%)
pain 4 (3) 4 (3) 5 (3) 4 (3) 0.2
bin_pain2



0.086
    0 or 1 pain 105 (27%) 13 (36%) 5 (16%) 87 (28%)
    2 to 8 pain 241 (63%) 20 (56%) 20 (63%) 201 (64%)
    9 or 10 pain 36 (9.4%) 3 (8.3%) 7 (22%) 26 (8.3%)
bin_pain3



0.012
    0 pain 72 (19%) 10 (28%) 3 (9.4%) 59 (19%)
    1 to 9 pain 287 (75%) 23 (64%) 23 (72%) 241 (77%)
    10 pain 23 (6.0%) 3 (8.3%) 6 (19%) 14 (4.5%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
#bin_pain3 is sig (p=0.012) but not bin pain 2 

#month 6
e2_full_long %>%
  filter(time == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
M6 Table: Features by Estrogen Bin, 6 months
Characteristic Overall, N = 3971 Relative Estrogen Bin p-value2
high E2, N = 371 low E2, N = 351 mid E2, N = 3251
ED_Age 37 (13) 31 (9) 46 (12) 37 (14) <0.001
ED_HighestGrade



0.7
    Some College or Associate Degree 159 (40%) 15 (41%) 10 (29%) 134 (41%)
    Advanced Degree 27 (6.8%) 2 (5.4%) 1 (2.9%) 24 (7.4%)
    High School or Less 142 (36%) 14 (38%) 15 (43%) 113 (35%)
    Bachelor's Degree 69 (17%) 6 (16%) 9 (26%) 54 (17%)
BMI 30 (8) 30 (9) 28 (6) 30 (8) 0.6
Race



0.3
    Hispanic 57 (14%) 3 (8.1%) 6 (17%) 48 (15%)
    Non-Hispanic Black 175 (44%) 16 (43%) 12 (34%) 147 (45%)
    Non-Hispanic Other 23 (5.8%) 2 (5.4%) 5 (14%) 16 (4.9%)
    Non-Hispanic White 142 (36%) 16 (43%) 12 (34%) 114 (35%)
sqrt.conc 5.84 (3.09) 13.46 (2.63) 2.46 (0.40) 5.33 (1.67) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.8
bin_pain



0.5
    Low to None 231 (58%) 22 (59%) 17 (49%) 192 (59%)
    Moderate to Severe 166 (42%) 15 (41%) 18 (51%) 133 (41%)
pain 4 (3) 4 (3) 5 (3) 4 (3) 0.086
bin_pain2



0.021
    0 or 1 pain 137 (35%) 13 (35%) 9 (26%) 115 (35%)
    2 to 8 pain 226 (57%) 21 (57%) 17 (49%) 188 (58%)
    9 or 10 pain 34 (8.6%) 3 (8.1%) 9 (26%) 22 (6.8%)
bin_pain3



<0.001
    0 pain 102 (26%) 11 (30%) 2 (5.7%) 89 (27%)
    1 to 9 pain 271 (68%) 24 (65%) 25 (71%) 222 (68%)
    10 pain 24 (6.0%) 2 (5.4%) 8 (23%) 14 (4.3%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test
#bin pain 2 sig (p=0.021) and bin pain 3 sig (p<.001)

hell yeah

stats for new categories

table(e2_full_long$e2_bin, e2_full_long$bin_pain3)
##          
##           0 pain 1 to 9 pain 10 pain
##   high E2     27          74       8
##   low E2       7          73      21
##   mid E2     188         718      47
round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain3), margin = 1) * 100,2)
##          
##           0 pain 1 to 9 pain 10 pain
##   high E2  24.77       67.89    7.34
##   low E2    6.93       72.28   20.79
##   mid E2   19.73       75.34    4.93
chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"], e2_full_long[e2_full_long$time == 2, "bin_pain3"]) # p-value = 0.03053
## Warning in chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 2, "e2_bin"] and e2_full_long[e2_full_long$time == 2, "bin_pain3"]
## X-squared = 10.67, df = 4, p-value = 0.03053
chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"], e2_full_long[e2_full_long$time == 3, "bin_pain3"]) #p-value = 0.007257
## Warning in chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 3, "e2_bin"] and e2_full_long[e2_full_long$time == 3, "bin_pain3"]
## X-squared = 14.012, df = 4, p-value = 0.007257
chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"], e2_full_long[e2_full_long$time == 6, "bin_pain3"]) #p-value = 7.419e-05
## Warning in chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 6, "e2_bin"] and e2_full_long[e2_full_long$time == 6, "bin_pain3"]
## X-squared = 24.16, df = 4, p-value = 7.419e-05

redo model from above

MARCH 2024 NEW E2 DATA CODE GLS MODEL

e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)

summary(e2_model)
## Generalized least squares fit by REML
##   Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain +      Race + Plate.Number + e2_bin 
##   Data: e2_full_long 
##        AIC      BIC    logLik
##   5563.071 5648.849 -2764.535
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.7287282 
## 
## Coefficients:
##                                         Value Std.Error   t-value p-value
## (Intercept)                         1.1045118 1.2952495  0.852741  0.3940
## time                               -0.2372138 0.0453306 -5.232973  0.0000
## sqrt.conc                           0.0769038 0.0720024  1.068072  0.2857
## ED_Age                              0.0397639 0.0097896  4.061841  0.0001
## ED_HighestGradeAdvanced Degree     -1.0276386 0.5174215 -1.986076  0.0473
## ED_HighestGradeHigh School or Less -0.0479274 0.2807562 -0.170708  0.8645
## ED_HighestGradeBachelor's Degree   -0.7130133 0.3522310 -2.024277  0.0432
## BMI                                 0.0287845 0.0147894  1.946294  0.0519
## ED_Pain                             0.2582287 0.0489249  5.278060  0.0000
## RaceNon-Hispanic Black             -0.5138723 0.3735496 -1.375647  0.1692
## RaceNon-Hispanic Other             -0.5722177 0.5989953 -0.955296  0.3396
## RaceNon-Hispanic White             -0.5373559 0.3901958 -1.377144  0.1687
## Plate.Number                       -0.0233885 0.0186592 -1.253453  0.2103
## e2_binlow E2                        1.6358354 0.9715319  1.683769  0.0925
## e2_binmid E2                        0.4536050 0.7128541  0.636322  0.5247
## 
##  Correlation: 
##                                    (Intr) time   sqrt.c ED_Age ED_HGAD ED_SoL
## time                               -0.143                                    
## sqrt.conc                          -0.743 -0.001                             
## ED_Age                             -0.280  0.004  0.146                      
## ED_HighestGradeAdvanced Degree     -0.153  0.002  0.013 -0.106               
## ED_HighestGradeHigh School or Less -0.149  0.000 -0.001  0.117  0.231        
## ED_HighestGradeBachelor's Degree   -0.083  0.000 -0.019 -0.058  0.239   0.343
## BMI                                -0.237  0.006 -0.107 -0.180  0.072   0.044
## ED_Pain                            -0.272  0.002 -0.045  0.046  0.135  -0.073
## RaceNon-Hispanic Black             -0.222  0.000  0.004 -0.069  0.224   0.038
## RaceNon-Hispanic Other             -0.107  0.002 -0.096  0.040  0.110   0.007
## RaceNon-Hispanic White             -0.266 -0.002  0.044 -0.090  0.134   0.055
## Plate.Number                       -0.206  0.000  0.029 -0.019  0.062   0.067
## e2_binlow E2                       -0.709 -0.001  0.798 -0.039  0.040  -0.039
## e2_binmid E2                       -0.771 -0.002  0.811  0.046  0.014   0.000
##                                    ED_HGBD BMI    ED_Pan RcN-HB RcN-HO RcN-HW
## time                                                                         
## sqrt.conc                                                                    
## ED_Age                                                                       
## ED_HighestGradeAdvanced Degree                                               
## ED_HighestGradeHigh School or Less                                           
## ED_HighestGradeBachelor's Degree                                             
## BMI                                 0.028                                    
## ED_Pain                             0.070   0.013                            
## RaceNon-Hispanic Black              0.075  -0.010  0.014                     
## RaceNon-Hispanic Other              0.056   0.019  0.097  0.464              
## RaceNon-Hispanic White             -0.048  -0.012  0.157  0.718  0.441       
## Plate.Number                        0.014   0.059  0.075 -0.037 -0.047 -0.105
## e2_binlow E2                       -0.044  -0.033 -0.014  0.048 -0.105  0.092
## e2_binmid E2                       -0.014  -0.079 -0.012  0.029 -0.063  0.083
##                                    Plt.Nm e2_bnlE2
## time                                              
## sqrt.conc                                         
## ED_Age                                            
## ED_HighestGradeAdvanced Degree                    
## ED_HighestGradeHigh School or Less                
## ED_HighestGradeBachelor's Degree                  
## BMI                                               
## ED_Pain                                           
## RaceNon-Hispanic Black                            
## RaceNon-Hispanic Other                            
## RaceNon-Hispanic White                            
## Plate.Number                                      
## e2_binlow E2                        0.026         
## e2_binmid E2                        0.034  0.880  
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.20825378 -0.80873595  0.03119517  0.73834813  2.56387776 
## 
## Residual standard error: 2.996335 
## Degrees of freedom: 1163 total; 1148 residual
#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain3,
                                        `0 pain` = "1",
                                        `1 to 9 pain` = "2",
                                        `10 pain` = "3") #new dimensions 1163 x 17

#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2  p = 2.398452e-03 = 0.002398452  
## # weights:  48 (30 variable)
## initial  value 1277.686092 
## iter  10 value 976.255582
## iter  20 value 828.205849
## iter  30 value 760.275081
## iter  40 value 757.915874
## final  value 757.915779 
## converged
#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE)
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 1.0845445 0.8347454 1.2992519 0.1938575 -0.5515263 2.7206154
2 time -0.1986780 0.0441989 -4.4950857 0.0000070 -0.2853063 -0.1120497
2 sqrt.conc -0.0246233 0.0448684 -0.5487893 0.5831500 -0.1125638 0.0633172
2 ED_Age 0.0241006 0.0066603 3.6185254 0.0002963 0.0110466 0.0371546
2 ED_HighestGradeAdvanced Degree 0.0625538 0.3408485 0.1835238 0.8543871 -0.6054970 0.7306046
2 ED_HighestGradeHigh School or Less -0.1319082 0.1793021 -0.7356753 0.4619283 -0.4833339 0.2195176
2 ED_HighestGradeBachelor’s Degree 0.1582817 0.2390016 0.6622620 0.5078034 -0.3101529 0.6267163
2 BMI 0.0116255 0.0097880 1.1877339 0.2349383 -0.0075586 0.0308096
2 ED_Pain 0.0897672 0.0315763 2.8428666 0.0044710 0.0278788 0.1516556
2 RaceNon-Hispanic Black -0.5316580 0.2670261 -1.9910341 0.0464771 -1.0550195 -0.0082965
2 RaceNon-Hispanic Other -0.3982038 0.3928737 -1.0135670 0.3107894 -1.1682220 0.3718145
2 RaceNon-Hispanic White -0.3554165 0.2777867 -1.2794584 0.2007357 -0.8998683 0.1890354
2 Plate.Number -0.0205870 0.0118948 -1.7307527 0.0834959 -0.0439005 0.0027264
2 e2_binlow E2 0.8054025 0.6832337 1.1788096 0.2384740 -0.5337110 2.1445161
2 e2_binmid E2 0.0402581 0.4401452 0.0914656 0.9271226 -0.8224105 0.9029268
3 (Intercept) -4.3091417 1.5604971 -2.7613905 0.0057556 -7.3676598 -1.2506237
3 time -0.2171498 0.0828435 -2.6212044 0.0087620 -0.3795201 -0.0547795
3 sqrt.conc 0.1403640 0.0810177 1.7325095 0.0831829 -0.0184278 0.2991558
3 ED_Age 0.0518864 0.0123929 4.1868010 0.0000283 0.0275969 0.0761759
3 ED_HighestGradeAdvanced Degree -1.5629041 1.0823019 -1.4440556 0.1487233 -3.6841767 0.5583686
3 ED_HighestGradeHigh School or Less -0.1705342 0.3118840 -0.5467874 0.5845248 -0.7818157 0.4407472
3 ED_HighestGradeBachelor’s Degree -0.6793027 0.5122167 -1.3262017 0.1847729 -1.6832291 0.3246236
3 BMI -0.0214102 0.0196563 -1.0892260 0.2760542 -0.0599359 0.0171155
3 ED_Pain 0.3089434 0.0655578 4.7125342 0.0000024 0.1804525 0.4374343
3 RaceNon-Hispanic Black -0.5619284 0.4226214 -1.3296261 0.1836415 -1.3902511 0.2663943
3 RaceNon-Hispanic Other -0.8826778 0.7154517 -1.2337350 0.2173017 -2.2849373 0.5195818
3 RaceNon-Hispanic White -0.6647507 0.4814270 -1.3807924 0.1673428 -1.6083302 0.2788288
3 Plate.Number -0.0440065 0.0225823 -1.9487162 0.0513293 -0.0882670 0.0002540
3 e2_binlow E2 3.3565935 1.1056458 3.0358670 0.0023985 1.1895676 5.5236193
3 e2_binmid E2 0.7428651 0.7929725 0.9368107 0.3488560 -0.8113324 2.2970625
#e2_bin low E2 on category 3 (10 pain)  p = 2.398452e-03 = 0.002398452  
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain



# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 2, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 421.867119 
## iter  10 value 246.880449
## iter  20 value 217.350612
## iter  30 value 215.581016
## iter  40 value 215.559264
## iter  50 value 215.558537
## iter  50 value 215.558536
## iter  50 value 215.558536
## final  value 215.558536 
## converged
tidy(model_m2, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain 
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.0858882 1.7295095 4.966050e-02 0.9603930 -3.3038880 3.4756645
2 sqrt.conc -0.0147206 0.0959880 -1.533589e-01 0.8781152 -0.2028536 0.1734124
2 ED_Age 0.0334912 0.0143580 2.332580e+00 0.0196702 0.0053500 0.0616325
2 ED_HighestGradeAdvanced Degree 0.1225345 0.7225473 1.695868e-01 0.8653351 -1.2936321 1.5387012
2 ED_HighestGradeHigh School or Less -0.4313307 0.3744552 -1.151889e+00 0.2493667 -1.1652494 0.3025879
2 ED_HighestGradeBachelor’s Degree 0.1035662 0.5042208 2.053985e-01 0.8372608 -0.8846884 1.0918208
2 BMI -0.0005951 0.0197932 -3.006750e-02 0.9760132 -0.0393891 0.0381989
2 ED_Pain 0.2279952 0.0642853 3.546617e+00 0.0003902 0.1019984 0.3539920
2 RaceNon-Hispanic Black -0.9484818 0.6599985 -1.437097e+00 0.1506904 -2.2420551 0.3450915
2 RaceNon-Hispanic Other -0.8525189 0.8584694 -9.930685e-01 0.3206766 -2.5350880 0.8300502
2 RaceNon-Hispanic White -0.9190518 0.6705089 -1.370678e+00 0.1704753 -2.2332251 0.3951215
2 Plate.Number 0.0172467 0.0253205 6.811342e-01 0.4957866 -0.0323806 0.0668739
2 e2_binlow E2 0.5784955 1.3879456 4.167998e-01 0.6768248 -2.1418279 3.2988188
2 e2_binmid E2 0.1494685 0.9184424 1.627413e-01 0.8707221 -1.6506455 1.9495825
3 (Intercept) -6.3707373 2.7188448 -2.343178e+00 0.0191203 -11.6995752 -1.0418994
3 sqrt.conc 0.2628004 0.1411389 1.861998e+00 0.0626034 -0.0138268 0.5394275
3 ED_Age 0.0463402 0.0222611 2.081669e+00 0.0373727 0.0027093 0.0899712
3 ED_HighestGradeAdvanced Degree -14.2193859 0.0000018 -8.033872e+06 0.0000000 -14.2193894 -14.2193825
3 ED_HighestGradeHigh School or Less -0.7225669 0.5709831 -1.265478e+00 0.2056998 -1.8416733 0.3965395
3 ED_HighestGradeBachelor’s Degree -0.6155575 0.8259645 -7.452590e-01 0.4561151 -2.2344182 1.0033032
3 BMI -0.0466263 0.0353995 -1.317147e+00 0.1877895 -0.1160081 0.0227554
3 ED_Pain 0.4455983 0.1157997 3.848008e+00 0.0001191 0.2186350 0.6725617
3 RaceNon-Hispanic Black -1.0157518 0.8513202 -1.193149e+00 0.2328111 -2.6843087 0.6528052
3 RaceNon-Hispanic Other -1.9980674 1.4362052 -1.391213e+00 0.1641608 -4.8129778 0.8168429
3 RaceNon-Hispanic White -0.9375516 0.9131053 -1.026773e+00 0.3045275 -2.7272050 0.8521018
3 Plate.Number 0.0154110 0.0394421 3.907258e-01 0.6959999 -0.0618940 0.0927161
3 e2_binlow E2 4.8051741 2.0343863 2.361977e+00 0.0181778 0.8178502 8.7924980
3 e2_binmid E2 2.1336870 1.4507924 1.470705e+00 0.1413710 -0.7098137 4.9771878
#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 3, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 419.669894 
## iter  10 value 265.651792
## iter  20 value 245.264571
## iter  30 value 243.580449
## final  value 243.578224 
## converged
tidy(model_m3, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #no significance
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.1438903 1.4251253 0.1009668 0.9195768 -2.6493040 2.9370847
2 sqrt.conc -0.0282643 0.0773150 -0.3655737 0.7146832 -0.1797990 0.1232703
2 ED_Age 0.0265784 0.0116658 2.2783275 0.0227071 0.0037140 0.0494429
2 ED_HighestGradeAdvanced Degree 0.1810048 0.6172130 0.2932614 0.7693223 -1.0287105 1.3907201
2 ED_HighestGradeHigh School or Less -0.0572961 0.3072826 -0.1864606 0.8520835 -0.6595590 0.5449667
2 ED_HighestGradeBachelor’s Degree 0.3226113 0.4328355 0.7453439 0.4560638 -0.5257307 1.1709534
2 BMI 0.0202893 0.0174147 1.1650659 0.2439923 -0.0138429 0.0544214
2 ED_Pain 0.0500940 0.0552732 0.9062990 0.3647776 -0.0582394 0.1584275
2 RaceNon-Hispanic Black -0.7732148 0.4909491 -1.5749389 0.1152706 -1.7354572 0.1890277
2 RaceNon-Hispanic Other -0.2607000 0.7272638 -0.3584668 0.7199940 -1.6861108 1.1647109
2 RaceNon-Hispanic White -0.6709575 0.5101474 -1.3152227 0.1884351 -1.6708281 0.3289131
2 Plate.Number -0.0070094 0.0207073 -0.3384981 0.7349879 -0.0475949 0.0335762
2 e2_binlow E2 0.4799754 1.1288069 0.4252059 0.6706866 -1.7324456 2.6923963
2 e2_binmid E2 0.2094152 0.7478034 0.2800404 0.7794465 -1.2562526 1.6750830
3 (Intercept) -5.8817990 2.7799044 -2.1158278 0.0343595 -11.3303114 -0.4332865
3 sqrt.conc 0.1531560 0.1468975 1.0426041 0.2971317 -0.1347579 0.4410698
3 ED_Age 0.0506380 0.0229009 2.2111761 0.0270236 0.0057530 0.0955230
3 ED_HighestGradeAdvanced Degree 0.0678288 1.2654705 0.0535997 0.9572541 -2.4124479 2.5481055
3 ED_HighestGradeHigh School or Less 0.2665381 0.5757837 0.4629136 0.6434263 -0.8619772 1.3950535
3 ED_HighestGradeBachelor’s Degree -0.0078086 0.9238302 -0.0084524 0.9932560 -1.8184825 1.8028652
3 BMI -0.0015620 0.0341004 -0.0458069 0.9634641 -0.0683975 0.0652734
3 ED_Pain 0.3746751 0.1269878 2.9504817 0.0031728 0.1257836 0.6235665
3 RaceNon-Hispanic Black -1.1860836 0.7500276 -1.5813866 0.1137897 -2.6561106 0.2839435
3 RaceNon-Hispanic Other -1.1249035 1.3691195 -0.8216255 0.4112901 -3.8083284 1.5585215
3 RaceNon-Hispanic White -0.7911548 0.8243052 -0.9597838 0.3371640 -2.4067633 0.8244537
3 Plate.Number -0.0840347 0.0453981 -1.8510609 0.0641608 -0.1730133 0.0049440
3 e2_binlow E2 2.9876362 1.8874151 1.5829248 0.1134386 -0.7116295 6.6869018
3 e2_binmid E2 0.5806531 1.3310391 0.4362404 0.6626623 -2.0281356 3.1894418
#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 6, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 436.149079 
## iter  10 value 306.109642
## iter  20 value 280.123037
## iter  30 value 278.298475
## iter  40 value 278.254523
## iter  50 value 278.253270
## iter  50 value 278.253268
## iter  50 value 278.253268
## final  value 278.253268 
## converged
tidy(model_m6, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.5545904 1.2574649 4.410385e-01 0.6591851 -1.9099955 3.0191763
2 sqrt.conc -0.0206597 0.0689491 -2.996363e-01 0.7644546 -0.1557975 0.1144782
2 ED_Age 0.0182652 0.0100995 1.808518e+00 0.0705258 -0.0015295 0.0380599
2 ED_HighestGradeAdvanced Degree -0.0670360 0.5113894 -1.310860e-01 0.8957073 -1.0693408 0.9352688
2 ED_HighestGradeHigh School or Less -0.0116962 0.2802121 -4.174060e-02 0.9667055 -0.5609020 0.5375095
2 ED_HighestGradeBachelor’s Degree 0.0680815 0.3563096 1.910741e-01 0.8484675 -0.6302724 0.7664355
2 BMI 0.0119860 0.0150889 7.943606e-01 0.4269855 -0.0175877 0.0415597
2 ED_Pain 0.0405024 0.0494682 8.187562e-01 0.4129255 -0.0564535 0.1374584
2 RaceNon-Hispanic Black -0.2305422 0.3803868 -6.060730e-01 0.5444663 -0.9760867 0.5150023
2 RaceNon-Hispanic Other -0.3601504 0.5932052 -6.071261e-01 0.5437672 -1.5228113 0.8025105
2 RaceNon-Hispanic White 0.0853640 0.3992413 2.138155e-01 0.8306909 -0.6971346 0.8678625
2 Plate.Number -0.0524656 0.0184486 -2.843882e+00 0.0044568 -0.0886242 -0.0163070
2 e2_binlow E2 1.3410510 1.1340476 1.182535e+00 0.2369935 -0.8816414 3.5637434
2 e2_binmid E2 -0.1077641 0.6895640 -1.562786e-01 0.8758134 -1.4592846 1.2437564
3 (Intercept) -3.0896264 2.8204937 -1.095420e+00 0.2733325 -8.6176924 2.4384396
3 sqrt.conc -0.0463444 0.1645973 -2.815624e-01 0.7782791 -0.3689492 0.2762604
3 ED_Age 0.0722735 0.0220570 3.276674e+00 0.0010504 0.0290426 0.1155043
3 ED_HighestGradeAdvanced Degree -13.9746453 0.0000027 -5.102638e+06 0.0000000 -13.9746507 -13.9746399
3 ED_HighestGradeHigh School or Less -0.0954912 0.5289536 -1.805284e-01 0.8567377 -1.1322212 0.9412389
3 ED_HighestGradeBachelor’s Degree -1.5368456 1.1258387 -1.365067e+00 0.1722319 -3.7434489 0.6697578
3 BMI -0.0224125 0.0352416 -6.359679e-01 0.5247974 -0.0914848 0.0466598
3 ED_Pain 0.1922234 0.1068746 1.798588e+00 0.0720838 -0.0172470 0.4016938
3 RaceNon-Hispanic Black 0.1720303 0.7607170 2.261424e-01 0.8210907 -1.3189476 1.6630083
3 RaceNon-Hispanic Other 0.1933064 1.1271797 1.714956e-01 0.8638341 -2.0159253 2.4025380
3 RaceNon-Hispanic White -0.9022036 0.9595953 -9.401918e-01 0.3471192 -2.7829758 0.9785685
3 Plate.Number -0.0614748 0.0387215 -1.587613e+00 0.1123738 -0.1373677 0.0144180
3 e2_binlow E2 1.6842627 2.0821929 8.088889e-01 0.4185791 -2.3967604 5.7652858
3 e2_binmid E2 -0.9692742 1.5326311 -6.324250e-01 0.5271092 -3.9731760 2.0346275
#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)

miRNA original code for miRNA

E2_women_wide_all<- E2_women_wide %>% 
  inner_join(ID_miRNA,by="PID") 

E2_women_long_all<- E2_women_long %>% 
  inner_join(ID_miRNA,by="PID") 
## Warning in inner_join(., ID_miRNA, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 176 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

NEW MARCH 2024 CODE FOR miRNA

none of the PIDs match up between the new estrogen data and the miRNA data

#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)

e2_wide_all<- e2_full_wide %>% 
  inner_join(ID_miRNA,by="PID") 

E2_long_all<- e2_full_long %>% 
  inner_join(ID_miRNA,by="PID")