#library(edgeR);#library(sva)
library(data.table);library(ggfortify)
## Loading required package: ggplot2
library(factoextra);library(ClassDiscovery)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Loading required package: cluster
## Loading required package: oompaBase
library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)
## 
## Attaching package: 'memisc'
## The following object is masked from 'package:ggplot2':
## 
##     syms
## The following objects are masked from 'package:stats':
## 
##     contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
## 
##     as.array
library(tidyverse);library(naniar);library(gtsummary)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%()             masks memisc::%@%()
## ✖ lubridate::as.interval() masks memisc::as.interval()
## ✖ dplyr::between()         masks data.table::between()
## ✖ dplyr::collect()         masks memisc::collect()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ dplyr::first()           masks data.table::first()
## ✖ dplyr::group_rows()      masks kableExtra::group_rows()
## ✖ lubridate::hour()        masks data.table::hour()
## ✖ lubridate::is.interval() masks memisc::is.interval()
## ✖ lubridate::isoweek()     masks data.table::isoweek()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ dplyr::last()            masks data.table::last()
## ✖ lubridate::mday()        masks data.table::mday()
## ✖ lubridate::minute()      masks data.table::minute()
## ✖ lubridate::month()       masks data.table::month()
## ✖ lubridate::quarter()     masks data.table::quarter()
## ✖ dplyr::recode()          masks memisc::recode()
## ✖ dplyr::rename()          masks memisc::rename()
## ✖ lubridate::second()      masks data.table::second()
## ✖ dplyr::select()          masks MASS::select()
## ✖ dplyr::syms()            masks memisc::syms(), ggplot2::syms()
## ✖ purrr::transpose()       masks data.table::transpose()
## ✖ tibble::view()           masks memisc::view()
## ✖ lubridate::wday()        masks data.table::wday()
## ✖ lubridate::week()        masks data.table::week()
## ✖ lubridate::yday()        masks data.table::yday()
## ✖ lubridate::year()        masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'gtsummary'
## 
## 
## The following object is masked from 'package:MASS':
## 
##     select
require(lme4); require(nnet); require(tidyverse);
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: nnet
library(broom);library(readxl);library(nlme)
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:lme4':
## 
##     lmList
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse

NEW DATA FILES

#aurora data
df0 <- read.csv("full_aurora.csv")

estrogen data

#estrogen levels
e2_conc <- read.csv("E2_concentrated.csv")
#e2_od <- read.csv("E2_OD.csv") #Do I need this data?



#filter out the 0 values, 420 values, and 0 CV values
e2_conc$Mean.Concentration..pg.mL. <- as.numeric(e2_conc$Mean.Concentration..pg.mL.)
## Warning: NAs introduced by coercion
e2_conc$X..CV <- as.numeric(e2_conc$X..CV)
## Warning: NAs introduced by coercion
#filter add bad samples
e2_conc <- e2_conc %>% 
  rename(mean.conc = Mean.Concentration..pg.mL.) %>% 
  filter(mean.conc > 0) %>%
  filter(mean.conc < 420) %>%
  filter(`X..CV` > 0) %>%
  rename(CV = X..CV)

#square root transform concentration
e2_conc <- e2_conc %>%
  mutate(sqrt.conc = sqrt(mean.conc)) %>%
  select(everything(),mean.conc, sqrt.conc)

#remove extra columns
e2_conc <- e2_conc[, -c(4, 5, 7, 9, 10)] #dimensions 860 x 6

keys

#PID to inventory ID key for estrogen
key <- read_excel("e2_pid_key.xlsx")

#sample ID to inventory ID and PID Keys for miRNA
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")

miRNA data

#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned) 
#miRNA_cleaned dimensions 596 x 1552

Link estrogen data with key

e2_keyed <- e2_conc %>% 
  inner_join(key,by=c("Inventory.Code" = "InventoryCode")) 

t0 <- e2_keyed %>% 
  filter(AlternateID == "T0")
t2 <- e2_keyed %>% 
  filter(AlternateID == "T1")
t3 <- e2_keyed %>% 
  filter(AlternateID == "T2")
t4 <- e2_keyed %>% 
  filter(AlternateID == "T3")

length(unique(t0$PID)) # unique PID 294
## [1] 432
#the key has 906 inventory ID/PIDs and the estrogen only has 860 samples
length(unique(e2_keyed[["PID"]])) #474
## [1] 466

Link miRNA with keys

ID2 <- ID_link %>% 
  right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub")) 

ID_miRNA <- miRNA_cleaned %>%
  mutate(sample=as.numeric(sample)) %>% 
  left_join(ID2,by="sample") #596 samples

length(unique(ID_miRNA$PID)) # unique PID 294
## [1] 294

subset to only the timepoint 0/ED visit miRNA values

ID_miRNA_T0 <- ID_miRNA %>% 
  filter(`ALT ID` == "T0") #dimensions 294 x 1557

March 2024 data Create wide df of estrogen and pain data

#aurora data is df0 #4745 rows
#estrogen data is e2_keyed #906 rows

#combine e2 and aurora data
e2_keyed_full <- e2_keyed %>% 
  left_join(df0,by=c("PID" = "PID")) #1217 rows x 353 columns
## Warning in left_join(., df0, by = c(PID = "PID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 15 of `x` matches multiple rows in `y`.
## ℹ Row 946 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
#where is the NRS pain score
grep("NRS", names(e2_keyed_full), value = TRUE)
## [1] "ED_Somatic_NRS_SUM"  "WK2_Somatic_NRS_SUM" "WK8_Somatic_NRS_SUM"
## [4] "M3_Somatic_NRS_SUM"  "M6_Somatic_NRS_SUM"
#This is only the NRS sum count, so a value greater than 10, not the 0-10 NRS value. Will use something else.

#Do I use ED_NowPain_C or ED_Pain_C for ED pain?
#I don't have the "Date.Run_T0" value for this new set of 2024 estrogen values. maybe I don't need it ?
#added plate number to control for batch effects
#create wide df below
e2_full_wide <- e2_keyed_full[,c("PID","ED_Age","ED_HighestGrade","BMI","ED_RaceEthCode","sqrt.conc","ED_NowPain_C","WK8_Pain_C", "M3_Pain_C", "M6_Pain_C","Plate.Number", "AlternateID")] #dim 1217 x 12

e2_full_wide %>%
  inner_join(ID2,by="PID") 
## Warning in inner_join(., ID2, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 123 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
##       PID ED_Age                    ED_HighestGrade  BMI     ED_RaceEthCode
## 1  101394     22               High school graduate   NA Non-Hispanic Black
## 2  101394     22               High school graduate   NA Non-Hispanic Black
## 3  101394     22               High school graduate   NA Non-Hispanic Black
## 4  101394     22               High school graduate   NA Non-Hispanic Black
## 5  101394     22               High school graduate   NA Non-Hispanic Black
## 6  101394     22               High school graduate   NA Non-Hispanic Black
## 7  101394     22               High school graduate   NA Non-Hispanic Black
## 8  101394     22               High school graduate   NA Non-Hispanic Black
## 9  110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 10 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 11 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 12 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 13 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 14 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 15 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 16 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 17 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 18 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 19 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 20 110747     60            Some college, no degree 33.7 Non-Hispanic Black
## 21 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 22 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 23 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 24 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 25 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 26 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 27 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 28 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 29 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 30 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 31 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 32 110965     20            Some college, no degree 25.1 Non-Hispanic White
## 33 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 34 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 35 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 36 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 37 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 38 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 39 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 40 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 41 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 42 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 43 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 44 110813     35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 45 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 46 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 47 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 48 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 49 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 50 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 51 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 52 111301     23            Some college, no degree 17.3 Non-Hispanic White
## 53 110468     41               High school graduate 51.1 Non-Hispanic White
## 54 110468     41               High school graduate 51.1 Non-Hispanic White
## 55 110468     41               High school graduate 51.1 Non-Hispanic White
## 56 110468     41               High school graduate 51.1 Non-Hispanic White
## 57 110468     41               High school graduate 51.1 Non-Hispanic White
## 58 110468     41               High school graduate 51.1 Non-Hispanic White
## 59 110468     41               High school graduate 51.1 Non-Hispanic White
## 60 110468     41               High school graduate 51.1 Non-Hispanic White
## 61 111201     21               High school graduate 22.7 Non-Hispanic Black
## 62 111201     21               High school graduate 22.7 Non-Hispanic Black
## 63 111201     21               High school graduate 22.7 Non-Hispanic Black
## 64 111201     21               High school graduate 22.7 Non-Hispanic Black
## 65 111201     21               High school graduate 22.7 Non-Hispanic Black
## 66 111201     21               High school graduate 22.7 Non-Hispanic Black
## 67 111128     29               High school graduate 32.3 Non-Hispanic White
## 68 111128     29               High school graduate 32.3 Non-Hispanic White
## 69 111128     29               High school graduate 32.3 Non-Hispanic White
## 70 111128     29               High school graduate 32.3 Non-Hispanic White
## 71 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 72 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 73 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 74 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 75 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 76 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 77 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 78 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 79 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 80 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 81 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 82 104627     52            Some college, no degree 23.8 Non-Hispanic White
## 83 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 84 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 85 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 86 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 87 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 88 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 89 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 90 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 91 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 92 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 93 108726     43            Some college, no degree 34.0 Non-Hispanic Black
## 94 108726     43            Some college, no degree 34.0 Non-Hispanic Black
##    sqrt.conc ED_NowPain_C WK8_Pain_C M3_Pain_C M6_Pain_C Plate.Number
## 1  14.870289            6          0         0         0            4
## 2  14.870289            6          0         0         0            4
## 3  14.870289            6          0         0         0            4
## 4  14.870289            6          0         0         0            4
## 5  14.870289            6          0         0         0            4
## 6  14.870289            6          0         0         0            4
## 7  14.870289            6          0         0         0            4
## 8  14.870289            6          0         0         0            4
## 9   3.411891            4          4         5         3            9
## 10  3.411891            4          4         5         3            9
## 11  3.411891            4          4         5         3            9
## 12  3.411891            4          4         5         3            9
## 13  3.411891            4          4         5         3            9
## 14  3.411891            4          4         5         3            9
## 15  3.411891            4          4         5         3            9
## 16  3.411891            4          4         5         3            9
## 17  3.411891            4          4         5         3            9
## 18  3.411891            4          4         5         3            9
## 19  3.411891            4          4         5         3            9
## 20  3.411891            4          4         5         3            9
## 21  4.063250            5          0         0         0            9
## 22  4.063250            5          0         0         0            9
## 23  4.063250            5          0         0         0            9
## 24  4.063250            5          0         0         0            9
## 25  4.063250            5          0         0         0            9
## 26  4.063250            5          0         0         0            9
## 27  4.063250            5          0         0         0            9
## 28  4.063250            5          0         0         0            9
## 29  4.063250            5          0         0         0            9
## 30  4.063250            5          0         0         0            9
## 31  4.063250            5          0         0         0            9
## 32  4.063250            5          0         0         0            9
## 33 13.573504            4          8         8         7           10
## 34 13.573504            4          8         8         7           10
## 35 13.573504            4          8         8         7           10
## 36 13.573504            4          8         8         7           10
## 37 13.573504            4          8         8         7           10
## 38 13.573504            4          8         8         7           10
## 39 13.573504            4          8         8         7           10
## 40 13.573504            4          8         8         7           10
## 41 13.573504            4          8         8         7           10
## 42 13.573504            4          8         8         7           10
## 43 13.573504            4          8         8         7           10
## 44 13.573504            4          8         8         7           10
## 45  3.735706            6          4        NA         3           10
## 46  3.735706            6          4        NA         3           10
## 47  3.735706            6          4        NA         3           10
## 48  3.735706            6          4        NA         3           10
## 49  3.735706            6          4        NA         3           10
## 50  3.735706            6          4        NA         3           10
## 51  3.735706            6          4        NA         3           10
## 52  3.735706            6          4        NA         3           10
## 53  4.842365            7         NA         6         5           10
## 54  4.842365            7         NA         6         5           10
## 55  4.842365            7         NA         6         5           10
## 56  4.842365            7         NA         6         5           10
## 57  4.842365            7         NA         6         5           10
## 58  4.842365            7         NA         6         5           10
## 59  4.842365            7         NA         6         5           10
## 60  4.842365            7         NA         6         5           10
## 61  6.199113            7          7         6         6           11
## 62  6.199113            7          7         6         6           11
## 63  6.199113            7          7         6         6           11
## 64  6.199113            7          7         6         6           11
## 65  6.199113            7          7         6         6           11
## 66  6.199113            7          7         6         6           11
## 67  4.701542            8          7         7         6           16
## 68  4.701542            8          7         7         6           16
## 69  4.701542            8          7         7         6           16
## 70  4.701542            8          7         7         6           16
## 71  3.681304            6          7         4         8           17
## 72  3.681304            6          7         4         8           17
## 73  3.681304            6          7         4         8           17
## 74  3.681304            6          7         4         8           17
## 75  3.681304            6          7         4         8           17
## 76  3.681304            6          7         4         8           17
## 77  3.681304            6          7         4         8           17
## 78  3.681304            6          7         4         8           17
## 79  3.681304            6          7         4         8           17
## 80  3.681304            6          7         4         8           17
## 81  3.681304            6          7         4         8           17
## 82  3.681304            6          7         4         8           17
## 83  4.382009            5          6         4         4           17
## 84  4.382009            5          6         4         4           17
## 85  4.382009            5          6         4         4           17
## 86  4.382009            5          6         4         4           17
## 87  4.382009            5          6         4         4           17
## 88  4.382009            5          6         4         4           17
## 89  4.382009            5          6         4         4           17
## 90  4.382009            5          6         4         4           17
## 91  4.382009            5          6         4         4           17
## 92  4.382009            5          6         4         4           17
## 93  4.382009            5          6         4         4           17
## 94  4.382009            5          6         4         4           17
##    AlternateID SAMPLE ID ALT ID INVENTORY CODE Sample Type sample
## 1           T1  824-1024     T3      XA0050274        RNA1    522
## 2           T1  824-1024     T0      XA0045353        RNA1      2
## 3           T1  824-1024     T3      XA0050274        RNA1    522
## 4           T1  824-1024     T0      XA0045353        RNA1      2
## 5           T1  824-1024     T3      XA0050274        RNA1    522
## 6           T1  824-1024     T0      XA0045353        RNA1      2
## 7           T1  824-1024     T3      XA0050274        RNA1    522
## 8           T1  824-1024     T0      XA0045353        RNA1      2
## 9           T3  846-1109     T1      XA0055516        RNA1    336
## 10          T3  846-1109     T0      XA0055259        RNA1    148
## 11          T3  846-1109     T1      XA0055516        RNA1    336
## 12          T3  846-1109     T0      XA0055259        RNA1    148
## 13          T3  846-1109     T1      XA0055516        RNA1    336
## 14          T3  846-1109     T0      XA0055259        RNA1    148
## 15          T3  846-1109     T1      XA0055516        RNA1    336
## 16          T3  846-1109     T0      XA0055259        RNA1    148
## 17          T3  846-1109     T1      XA0055516        RNA1    336
## 18          T3  846-1109     T0      XA0055259        RNA1    148
## 19          T3  846-1109     T1      XA0055516        RNA1    336
## 20          T3  846-1109     T0      XA0055259        RNA1    148
## 21          T3  846-1112     T1      XA0055530        RNA1    341
## 22          T3  846-1112     T0      XA0055329        RNA1    430
## 23          T3  846-1112     T1      XA0055530        RNA1    341
## 24          T3  846-1112     T0      XA0055329        RNA1    430
## 25          T3  846-1112     T1      XA0055530        RNA1    341
## 26          T3  846-1112     T0      XA0055329        RNA1    430
## 27          T3  846-1112     T1      XA0055530        RNA1    341
## 28          T3  846-1112     T0      XA0055329        RNA1    430
## 29          T3  846-1112     T1      XA0055530        RNA1    341
## 30          T3  846-1112     T0      XA0055329        RNA1    430
## 31          T3  846-1112     T1      XA0055530        RNA1    341
## 32          T3  846-1112     T0      XA0055329        RNA1    430
## 33          T3  846-1110     T1      XA0055534        RNA1    343
## 34          T3  846-1110     T0      XA0055331        RNA1    151
## 35          T3  846-1110     T1      XA0055534        RNA1    343
## 36          T3  846-1110     T0      XA0055331        RNA1    151
## 37          T3  846-1110     T1      XA0055534        RNA1    343
## 38          T3  846-1110     T0      XA0055331        RNA1    151
## 39          T3  846-1110     T1      XA0055534        RNA1    343
## 40          T3  846-1110     T0      XA0055331        RNA1    151
## 41          T3  846-1110     T1      XA0055534        RNA1    343
## 42          T3  846-1110     T0      XA0055331        RNA1    151
## 43          T3  846-1110     T1      XA0055534        RNA1    343
## 44          T3  846-1110     T0      XA0055331        RNA1    151
## 45          T3  846-1117     T1      XA0055521        RNA1    338
## 46          T3  846-1117     T0      XA0055300        RNA1    429
## 47          T3  846-1117     T1      XA0055521        RNA1    338
## 48          T3  846-1117     T0      XA0055300        RNA1    429
## 49          T3  846-1117     T1      XA0055521        RNA1    338
## 50          T3  846-1117     T0      XA0055300        RNA1    429
## 51          T3  846-1117     T1      XA0055521        RNA1    338
## 52          T3  846-1117     T0      XA0055300        RNA1    429
## 53          T3  873-1054     T0      XA0054511        RNA1    299
## 54          T3  873-1054     T1      XA0054622        RNA1    305
## 55          T3  873-1054     T0      XA0054511        RNA1    299
## 56          T3  873-1054     T1      XA0054622        RNA1    305
## 57          T3  873-1054     T0      XA0054511        RNA1    299
## 58          T3  873-1054     T1      XA0054622        RNA1    305
## 59          T3  873-1054     T0      XA0054511        RNA1    299
## 60          T3  873-1054     T1      XA0054622        RNA1    305
## 61          T3  825-1110     T0      XA0055257        RNA1    168
## 62          T3  825-1110     T1      XA0055225        RNA1    575
## 63          T3  825-1110     T0      XA0055257        RNA1    168
## 64          T3  825-1110     T1      XA0055225        RNA1    575
## 65          T3  825-1110     T0      XA0055257        RNA1    168
## 66          T3  825-1110     T1      XA0055225        RNA1    575
## 67          T3  828-1216     T0      XA0054998        RNA1    573
## 68          T3  828-1216     T1      XA0054147        RNA1    119
## 69          T3  828-1216     T0      XA0054998        RNA1    573
## 70          T3  828-1216     T1      XA0054147        RNA1    119
## 71          T3  827-1008     T1      XA0047505        RNA1    485
## 72          T3  827-1008     T0      XA0045856        RNA1    450
## 73          T3  827-1008     T1      XA0047505        RNA1    485
## 74          T3  827-1008     T0      XA0045856        RNA1    450
## 75          T3  827-1008     T1      XA0047505        RNA1    485
## 76          T3  827-1008     T0      XA0045856        RNA1    450
## 77          T3  827-1008     T1      XA0047505        RNA1    485
## 78          T3  827-1008     T0      XA0045856        RNA1    450
## 79          T3  827-1008     T1      XA0047505        RNA1    485
## 80          T3  827-1008     T0      XA0045856        RNA1    450
## 81          T3  827-1008     T1      XA0047505        RNA1    485
## 82          T3  827-1008     T0      XA0045856        RNA1    450
## 83          T3  828-1145     T0      XA0053287        RNA1    406
## 84          T3  828-1145     T1      XA0053178        RNA1     86
## 85          T3  828-1145     T0      XA0053287        RNA1    406
## 86          T3  828-1145     T1      XA0053178        RNA1     86
## 87          T3  828-1145     T0      XA0053287        RNA1    406
## 88          T3  828-1145     T1      XA0053178        RNA1     86
## 89          T3  828-1145     T0      XA0053287        RNA1    406
## 90          T3  828-1145     T1      XA0053178        RNA1     86
## 91          T3  828-1145     T0      XA0053287        RNA1    406
## 92          T3  828-1145     T1      XA0053178        RNA1     86
## 93          T3  828-1145     T0      XA0053287        RNA1    406
## 94          T3  828-1145     T1      XA0053178        RNA1     86
#at this above step which includes all time points of E2 for the March 2024 data, there are 94 overlapping PIDs with the 2021 miRNA data

#rename some of the columns
e2_full_wide <- e2_full_wide %>% 
  rename(ED_Pain = ED_NowPain_C) %>% 
  rename(WK8_Pain = WK8_Pain_C) %>% 
  rename(M3_Pain = M3_Pain_C) %>% 
  rename(M6_Pain = M6_Pain_C) %>% 
  rename(Race = ED_RaceEthCode)   

#Remove cases without estrogen measurements
e2_full_wide <- e2_full_wide[complete.cases(e2_full_wide$sqrt.conc), ] #dimensions 1161 x 12

#Keep ONLY estrogen time point T0, E2 at time of ED visit
e2_full_wide <- e2_full_wide %>% 
  filter(AlternateID == "T0") #dimensions 558 x 12

#note: after the above step of keeping only T0 values, there remain no overlapping PIDs between the March 2024 E2 data and the 2021 miRNA data. 

#summarize wide df and see how many unique PIDs
#summary(e2_full_wide)
length(unique(e2_full_wide[["PID"]])) #432 unique PID
## [1] 432

NEW MARCH 2024 CODE fix the education category

unique(e2_full_wide$ED_HighestGrade)
##  [1] "High school graduate"                                            
##  [2] "Some college, no degree"                                         
##  [3] "Associate degree: Occupational, technical, or vocational program"
##  [4] "Bachelor's degree: BA, AB, BS, BBA"                              
##  [5] "GED or equivalent"                                               
##  [6] "Associate degree: Academic program"                              
##  [7] "Doctoral degree: PhD, EdD"                                       
##  [8] "Master's degree: MA, MS, MEng, MEd, MBA"                         
##  [9] "Professional school degree: MD, DDS, DVM, JD"                    
## [10] "8th grade"                                                       
## [11] "11th grade"                                                      
## [12] "12th grade, no diploma"                                          
## [13] "9th grade"                                                       
## [14] "10th grade"                                                      
## [15] "7th grade"
#reduce education categories to 4 levels
level_mapping <- c(
  "Some college, no degree" = "Some College or Associate Degree",
  "Professional school degree: MD, DDS, DVM, JD" = "Advanced Degree",
  "Associate degree: Occupational, technical, or vocational program" = "Some College or Associate Degree",
  "High school graduate" = "High School or Less",
  "Bachelor's degree: BA, AB, BS, BBA" = "Bachelor's Degree",
  "Master's degree: MA, MS, MEng, MEd, MBA" = "Advanced Degree",
  "GED or equivalent" = "High School or Less",
  "8th grade" = "High School or Less",
  "Doctoral degree: PhD, EdD" = "Advanced Degree",
  "Associate degree: Academic program" = "Some College or Associate Degree",
  "12th grade, no diploma" = "High School or Less",
  "11th grade" = "High School or Less",
  "9th grade" = "High School or Less",
  "10th grade" = "High School or Less",
  "7th grade" = "High School or Less"
)

#apply new mapping to education category
e2_full_wide$ED_HighestGrade <- factor(e2_full_wide$ED_HighestGrade, levels = names(level_mapping), labels = level_mapping)
#dim 558 x 12

March 2024 data Create long df of estrogen and pain data changes: updated df used, removed Week2 pain rename

gather(e2_full_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
  mutate(time = ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA)))) %>%
  arrange(PID,time) -> e2_full_long 

e2_full_long <- e2_full_long[complete.cases(e2_full_long),] #dim 1541 x 12
length(unique(e2_full_long[["PID"]])) #397 PID
## [1] 397
#summary(e2_full_long$sqrt.conc)

#there are a few random duplicates that need to be removed
e2_full_long <- e2_full_long[!duplicated(e2_full_long), ] #dim 1163 x 12

length(unique(e2_full_long[["PID"]])) #397 PID
## [1] 397

MARCH 2024 NEW E2 DATA CODE GLS MODEL

#took out highest grade because its not coded correctly 
e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)

summary(e2_model)
## Generalized least squares fit by REML
##   Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain +      Race + Plate.Number 
##   Data: e2_full_long 
##       AIC      BIC    logLik
##   5566.51 5642.223 -2768.255
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##    Phi1 
## 0.73062 
## 
## Coefficients:
##                                         Value Std.Error   t-value p-value
## (Intercept)                         1.9370128 0.8249753  2.347965  0.0190
## time                               -0.2372621 0.0453816 -5.228156  0.0000
## sqrt.conc                           0.0097417 0.0403274  0.241564  0.8092
## ED_Age                              0.0433499 0.0096850  4.475991  0.0000
## ED_HighestGradeAdvanced Degree     -1.1040972 0.5188331 -2.128039  0.0335
## ED_HighestGradeHigh School or Less  0.0062920 0.2810954  0.022384  0.9821
## ED_HighestGradeBachelor's Degree   -0.6553079 0.3530373 -1.856200  0.0637
## BMI                                 0.0268486 0.0147667  1.818177  0.0693
## ED_Pain                             0.2596334 0.0491419  5.283337  0.0000
## RaceNon-Hispanic Black             -0.5629988 0.3746560 -1.502709  0.1332
## RaceNon-Hispanic Other             -0.4014009 0.5972709 -0.672058  0.5017
## RaceNon-Hispanic White             -0.5940566 0.3903109 -1.522009  0.1283
## Plate.Number                       -0.0234922 0.0187327 -1.254078  0.2101
## 
##  Correlation: 
##                                    (Intr) time   sqrt.c ED_Age ED_HGAD ED_SoL
## time                               -0.226                                    
## sqrt.conc                          -0.301  0.001                             
## ED_Age                             -0.409  0.004  0.251                      
## ED_HighestGradeAdvanced Degree     -0.219  0.002 -0.015 -0.098               
## ED_HighestGradeHigh School or Less -0.245  0.000  0.025  0.105  0.237        
## ED_HighestGradeBachelor's Degree   -0.156  0.000  0.008 -0.069  0.244   0.339
## BMI                                -0.465  0.006 -0.101 -0.167  0.069   0.051
## ED_Pain                            -0.444  0.002 -0.060  0.046  0.136  -0.074
## RaceNon-Hispanic Black             -0.310  0.000 -0.050 -0.064  0.221   0.042
## RaceNon-Hispanic Other             -0.258  0.002 -0.048  0.027  0.118  -0.002
## RaceNon-Hispanic White             -0.316 -0.002 -0.055 -0.089  0.131   0.059
## Plate.Number                       -0.285  0.000  0.004 -0.022  0.063   0.066
##                                    ED_HGBD BMI    ED_Pan RcN-HB RcN-HO RcN-HW
## time                                                                         
## sqrt.conc                                                                    
## ED_Age                                                                       
## ED_HighestGradeAdvanced Degree                                               
## ED_HighestGradeHigh School or Less                                           
## ED_HighestGradeBachelor's Degree                                             
## BMI                                 0.032                                    
## ED_Pain                             0.070   0.013                            
## RaceNon-Hispanic Black              0.079  -0.011  0.015                     
## RaceNon-Hispanic Other              0.048   0.022  0.096  0.475              
## RaceNon-Hispanic White             -0.045  -0.008  0.159  0.718  0.456       
## Plate.Number                        0.014   0.062  0.076 -0.038 -0.046 -0.108
## 
## Standardized residuals:
##          Min           Q1          Med           Q3          Max 
## -2.096733348 -0.819667252  0.005922671  0.718887652  2.479985354 
## 
## Residual standard error: 3.005848 
## Degrees of freedom: 1163 total; 1150 residual

idea.. refactor pain to two categories: low to none and moderate to severe

e2_full_long <- e2_full_long %>%
  mutate(bin_pain = cut(pain, breaks = c(-Inf, 4, 10), labels = c("Low to None", "Moderate to Severe"))) #dimensions 1163 x 13

ggplot(e2_full_long, aes(x = sqrt.conc, fill = bin_pain)) +
  geom_histogram(binwidth = .2) +
  facet_grid(time ~ .) +
  labs(x = "Square Root of Estrogen Concentration", y = "Frequency", fill = "Pain Level")

lets look at the e2 values and re-level by QUINTILE

#visualize
summary(e2_full_long$sqrt.conc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.325   3.867   4.906   5.841   6.844  20.137
quantile(e2_full_long$sqrt.conc, probs = seq(0, 1, by = 0.2))
##        0%       20%       40%       60%       80%      100% 
##  1.325142  3.684359  4.494906  5.371406  7.504166 20.136844
q1 <- quantile(e2_full_long$sqrt.conc, 0.2) #first quintile
q5 <- quantile(e2_full_long$sqrt.conc, 0.8) #fifth quintile

ggplot(data = e2_full_long, aes(x = sqrt.conc, y = ..density..)) +
  geom_density(color = "deeppink3", fill = "deeppink3", alpha = .4, size=1.8) +
  geom_histogram(fill = "skyblue", color = "gray20", alpha = 0.9, bins = 50) +
  geom_vline(xintercept = q1, linetype = "dashed", color = "gray30", size = .5) +
  geom_vline(xintercept = q5, linetype = "dashed", color = "gray30", size = .5) +
  annotate("text", x = q1, y = 0.24, label = paste("20% Q =", round(q1, 2)), color = "gray20", hjust = 1.14, size = 4) +
  annotate("text", x = q5, y = 0.24, label = paste("80% Q =", round(q5, 2)), color = "gray20", hjust = -0.1, size = 4) +
  labs(x = "Estrogen Level (sqrt.conc)", y = "Density",
       title = "Density Plot of Estrogen Levels")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#explore cut off points, pick some levels
quantile(e2_full_long$sqrt.conc, probs = seq(0, 1, by = 0.25))
##        0%       25%       50%       75%      100% 
##  1.325142  3.867493  4.905558  6.843826 20.136844
quantile(e2_full_long$sqrt.conc, probs = seq(0, 1, by = 0.2))
##        0%       20%       40%       60%       80%      100% 
##  1.325142  3.684359  4.494906  5.371406  7.504166 20.136844
length(e2_full_long[e2_full_long$sqrt.conc >= q5, "sqrt.conc"]) #234 values greater than or equal to 5th quintile
## [1] 234
length(e2_full_long[e2_full_long$sqrt.conc <= q1, "sqrt.conc"]) #234 values less than or equal to 1st quintile
## [1] 234
length(e2_full_long$sqrt.conc[e2_full_long$sqrt.conc > q1 & e2_full_long$sqrt.conc < q5]) #695 values between 1st and 5th quintile
## [1] 695
234+234+695 #1163
## [1] 1163

make new categories based on this exploration..

e2_full_long <- e2_full_long %>%
  mutate(e2_bin = case_when(
    sqrt.conc <= q1 ~ "low E2 Q1",
    sqrt.conc > q1 & sqrt.conc < q5 ~ "mid E2 Q2-Q4",
    sqrt.conc >= q5 ~ "high E2 Q5"
  ))
#new dimensions 1163 x 14

summary table with new categories

e2_full_long %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  #add_overall() %>%
  add_p()%>%
  bold_p()
Full Table: Features by Estrogen Bin, ALL TIME POINTS
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 2341 low E2 Q1, N = 2341 mid E2 Q2-Q4, N = 6951
ED_Age 32 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


<0.001
    Some College or Associate Degree 86 (37%) 88 (38%) 292 (42%)
    Advanced Degree 15 (6.4%) 12 (5.1%) 54 (7.8%)
    High School or Less 95 (41%) 67 (29%) 254 (37%)
    Bachelor's Degree 38 (16%) 67 (29%) 95 (14%)
BMI 31 (9) 28 (6) 30 (8) 0.019
Race


<0.001
    Hispanic 29 (12%) 40 (17%) 98 (14%)
    Non-Hispanic Black 98 (42%) 80 (34%) 336 (48%)
    Non-Hispanic Other 24 (10%) 15 (6.4%) 30 (4.3%)
    Non-Hispanic White 83 (35%) 99 (42%) 231 (33%)
sqrt.conc 10.86 (3.04) 3.00 (0.55) 5.11 (1.03) <0.001
ED_Pain 7 (3) 6 (3) 6 (3) <0.001
bin_pain


0.5
    Low to None 114 (49%) 119 (51%) 368 (53%)
    Moderate to Severe 120 (51%) 115 (49%) 327 (47%)
pain 4 (3) 5 (3) 4 (3) 0.11
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#summary table for MONTH 2
e2_full_long %>%
  filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Non-Hispanic White", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=922] = 1176440, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 771 low E2 Q1, N = 781 mid E2 Q2-Q4, N = 2291
ED_Age 32 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.085
    Some College or Associate Degree 29 (38%) 29 (37%) 97 (42%)
    Advanced Degree 5 (6.5%) 4 (5.1%) 18 (7.9%)
    High School or Less 31 (40%) 22 (28%) 82 (36%)
    Bachelor's Degree 12 (16%) 23 (29%) 32 (14%)
BMI 31 (9) 28 (6) 31 (8) 0.2
Race



    Hispanic 9 (12%) 13 (17%) 33 (14%)
    Non-Hispanic Black 33 (43%) 26 (33%) 111 (48%)
    Non-Hispanic Other 8 (10%) 5 (6.4%) 10 (4.4%)
    Non-Hispanic White 27 (35%) 34 (44%) 75 (33%)
sqrt.conc 10.87 (3.08) 2.99 (0.57) 5.12 (1.03) <0.001
ED_Pain 7 (3) 6 (3) 6 (2) 0.008
bin_pain


0.2
    Low to None 28 (36%) 37 (47%) 109 (48%)
    Moderate to Severe 49 (64%) 41 (53%) 120 (52%)
pain 5 (3) 5 (3) 5 (3) 0.13
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#summary table for MONTH 3
e2_full_long %>%
  filter(time == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 771 low E2 Q1, N = 761 mid E2 Q2-Q4, N = 2291
ED_Age 33 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.14
    Some College or Associate Degree 28 (36%) 29 (38%) 95 (41%)
    Advanced Degree 5 (6.5%) 4 (5.3%) 18 (7.9%)
    High School or Less 32 (42%) 22 (29%) 85 (37%)
    Bachelor's Degree 12 (16%) 21 (28%) 31 (14%)
BMI 31 (10) 28 (6) 30 (9) 0.3
Race


0.3
    Hispanic 10 (13%) 13 (17%) 32 (14%)
    Non-Hispanic Black 32 (42%) 27 (36%) 110 (48%)
    Non-Hispanic Other 8 (10%) 5 (6.6%) 10 (4.4%)
    Non-Hispanic White 27 (35%) 31 (41%) 77 (34%)
sqrt.conc 10.85 (3.01) 3.01 (0.54) 5.11 (1.03) <0.001
ED_Pain 7 (3) 6 (3) 6 (3) 0.029
bin_pain


0.8
    Low to None 39 (51%) 37 (49%) 120 (52%)
    Moderate to Severe 38 (49%) 39 (51%) 109 (48%)
pain 4 (4) 5 (3) 4 (3) 0.4
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test; Fisher’s exact test
#summary table for MONTH 6
e2_full_long %>%
  filter(time == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Non-Hispanic White", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=638] = 1487357, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 801 low E2 Q1, N = 801 mid E2 Q2-Q4, N = 2371
ED_Age 32 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.092
    Some College or Associate Degree 29 (36%) 30 (38%) 100 (42%)
    Advanced Degree 5 (6.3%) 4 (5.0%) 18 (7.6%)
    High School or Less 32 (40%) 23 (29%) 87 (37%)
    Bachelor's Degree 14 (18%) 23 (29%) 32 (14%)
BMI 31 (9) 28 (6) 30 (8) 0.3
Race



    Hispanic 10 (13%) 14 (18%) 33 (14%)
    Non-Hispanic Black 33 (41%) 27 (34%) 115 (49%)
    Non-Hispanic Other 8 (10%) 5 (6.3%) 10 (4.2%)
    Non-Hispanic White 29 (36%) 34 (43%) 79 (33%)
sqrt.conc 10.84 (3.06) 2.99 (0.56) 5.11 (1.02) <0.001
ED_Pain 7 (3) 6 (3) 6 (3) 0.024
bin_pain


>0.9
    Low to None 47 (59%) 45 (56%) 139 (59%)
    Moderate to Severe 33 (41%) 35 (44%) 98 (41%)
pain 4 (3) 4 (3) 4 (3) 0.4
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case

visuals

ggplot(e2_full_long, aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
  scale_fill_manual(values = c("low E2 Q1" = "blue", "mid E2 Q2-Q4" = "green", "high E2 Q5" = "red")) 

#high and mid E2 peak at 0 pain. Everything mostly falls off at pain 10. 

#BELOW: PAIN AT 2 MONTHS. Mostly mid level pain everyone
ggplot(data = e2_full_long %>% filter(time == 2), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2 Q1" = "blue", "mid E2 Q2-Q4" = "green", "high E2 Q5" = "red")) 

#BELOW: PAIN AT 3 MONTHS, mostly mid level pain, high and mid e2 start to bounce back down to zero
ggplot(data = e2_full_long %>% filter(time == 3), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2 Q1" = "blue", "mid E2 Q2-Q4" = "green", "high E2 Q5" = "red")) 

#BELOW: PAIN AT 6 MONTHS. Mid and high e2 have a strong peak at 0 pain, and high e2 has another strong bump at ~6 pain. Low e2 is mildly right skewed towards 10. 
ggplot(data = e2_full_long %>% filter(time == 6), aes(x = pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2 Q1" = "blue", "mid E2 Q2-Q4" = "green", "high E2 Q5" = "red")) 

We see that low E2 has fewer 0s and more 10s at 6 months

#high E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "high E2 Q5"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "High E2 x Pain",
       x = "Pain", y = "Frequency")

#mid E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "mid E2 Q2-Q4"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "Mid E2 x Pain",
       x = "Pain", y = "Frequency")

#low E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "low E2 Q1"), aes(x = pain, fill = as.factor(time))) +
  geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
  labs(title = "Low E2 x Pain",
       x = "Pain", y = "Frequency")

#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 2, e2_bin %in% c("high E2 Q5", "low E2 Q1")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2 Q5" = "paleturquoise3", "low E2 Q1" = "lightcoral")) +
  labs(title = "Pain at Month 2 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 3, e2_bin %in% c("high E2 Q5", "low E2 Q1")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2 Q5" = "paleturquoise3", "low E2 Q1" = "lightcoral")) +
  labs(title = "Pain at Month 3 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 6, e2_bin %in% c("high E2 Q5", "low E2 Q1")), aes(x = pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2 Q5" = "paleturquoise3", "low E2 Q1" = "lightcoral")) +
  labs(title = "Pain at Month 6 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

chi square ?

table(e2_full_long$e2_bin, as.factor(e2_full_long$pain))
##               
##                  0   1   2   3   4   5   6   7   8   9  10
##   high E2 Q5    57  10  11  15  21  19  27  27  21   7  19
##   low E2 Q1     31  22  13  27  26  27  22  17  22   3  24
##   mid E2 Q2-Q4 134  60  45  67  62  93  80  48  44  29  33
table(e2_full_long$e2_bin, e2_full_long$bin_pain)
##               
##                Low to None Moderate to Severe
##   high E2 Q5           114                120
##   low E2 Q1            119                115
##   mid E2 Q2-Q4         368                327
chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain))
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and as.factor(e2_full_long$pain)
## X-squared = 44.554, df = 20, p-value = 0.001268
chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 1.3346, df = 2, p-value = 0.5131
kruskal.test(pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pain by e2_bin
## Kruskal-Wallis chi-squared = 4.4146, df = 2, p-value = 0.11

make new levels for pain

e2_full_long <- e2_full_long %>%
  mutate(bin_pain2 = case_when(
    pain < 2 ~ "0 or 1 pain",
    pain > 1 & pain < 9 ~ "2 to 8 pain",
    pain > 8 ~ "9 or 10 pain"
  ))
#new dimensions 1163 x 15

e2_full_long <- e2_full_long %>%
  mutate(bin_pain3 = case_when(
    pain < 1 ~ "0 pain",
    pain > 0 & pain < 10 ~ "1 to 9 pain",
    pain > 9 ~ "10 pain"
  ))
#new dimensions 1163 x 16

summary table

#each time point
e2_full_long %>%
  #filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
Combined Table: Features by Estrogen Bin, ALL TIME POINTS
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 2341 low E2 Q1, N = 2341 mid E2 Q2-Q4, N = 6951
ED_Age 32 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


<0.001
    Some College or Associate Degree 86 (37%) 88 (38%) 292 (42%)
    Advanced Degree 15 (6.4%) 12 (5.1%) 54 (7.8%)
    High School or Less 95 (41%) 67 (29%) 254 (37%)
    Bachelor's Degree 38 (16%) 67 (29%) 95 (14%)
BMI 31 (9) 28 (6) 30 (8) 0.019
Race


<0.001
    Hispanic 29 (12%) 40 (17%) 98 (14%)
    Non-Hispanic Black 98 (42%) 80 (34%) 336 (48%)
    Non-Hispanic Other 24 (10%) 15 (6.4%) 30 (4.3%)
    Non-Hispanic White 83 (35%) 99 (42%) 231 (33%)
sqrt.conc 10.86 (3.04) 3.00 (0.55) 5.11 (1.03) <0.001
ED_Pain 7 (3) 6 (3) 6 (3) <0.001
bin_pain


0.5
    Low to None 114 (49%) 119 (51%) 368 (53%)
    Moderate to Severe 120 (51%) 115 (49%) 327 (47%)
pain 4 (3) 5 (3) 4 (3) 0.11
bin_pain2


0.4
    0 or 1 pain 67 (29%) 53 (23%) 194 (28%)
    2 to 8 pain 141 (60%) 154 (66%) 439 (63%)
    9 or 10 pain 26 (11%) 27 (12%) 62 (8.9%)
bin_pain3


<0.001
    0 pain 57 (24%) 31 (13%) 134 (19%)
    1 to 9 pain 158 (68%) 179 (76%) 528 (76%)
    10 pain 19 (8.1%) 24 (10%) 33 (4.7%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#bin pain 2 and bin pain 3 significant (p<.001)

#month 2
e2_full_long %>%
  filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
 #add_overall() %>%
  add_p(test.args = c(bin_pain2,bin_pain3) ~ list(workspace=2e9)) %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Non-Hispanic White", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=922] = 1176440, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
## Note for variable 'bin_pain2': Argument(s) 'test.args' do not apply and were
## ignored. See `?tests` for details.
## Note for variable 'bin_pain3': Argument(s) 'test.args' do not apply and were
## ignored. See `?tests` for details.
WK8 Table: Features by Estrogen Bin, 8 weeks
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 771 low E2 Q1, N = 781 mid E2 Q2-Q4, N = 2291
ED_Age 32 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.085
    Some College or Associate Degree 29 (38%) 29 (37%) 97 (42%)
    Advanced Degree 5 (6.5%) 4 (5.1%) 18 (7.9%)
    High School or Less 31 (40%) 22 (28%) 82 (36%)
    Bachelor's Degree 12 (16%) 23 (29%) 32 (14%)
BMI 31 (9) 28 (6) 31 (8) 0.2
Race



    Hispanic 9 (12%) 13 (17%) 33 (14%)
    Non-Hispanic Black 33 (43%) 26 (33%) 111 (48%)
    Non-Hispanic Other 8 (10%) 5 (6.4%) 10 (4.4%)
    Non-Hispanic White 27 (35%) 34 (44%) 75 (33%)
sqrt.conc 10.87 (3.08) 2.99 (0.57) 5.12 (1.03) <0.001
ED_Pain 7 (3) 6 (3) 6 (2) 0.008
bin_pain


0.2
    Low to None 28 (36%) 37 (47%) 109 (48%)
    Moderate to Severe 49 (64%) 41 (53%) 120 (52%)
pain 5 (3) 5 (3) 5 (3) 0.13
bin_pain2


0.8
    0 or 1 pain 13 (17%) 12 (15%) 47 (21%)
    2 to 8 pain 54 (70%) 58 (74%) 155 (68%)
    9 or 10 pain 10 (13%) 8 (10%) 27 (12%)
bin_pain3


0.6
    0 pain 11 (14%) 7 (9.0%) 30 (13%)
    1 to 9 pain 58 (75%) 64 (82%) 185 (81%)
    10 pain 8 (10%) 7 (9.0%) 14 (6.1%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#no sig in pain

#month 3
e2_full_long %>%
  filter(time == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
  #add_overall() %>%
  add_p(test.args = c(bin_pain2,bin_pain3) ~ list(workspace=2e9)) %>%
  bold_p()
## Note for variable 'bin_pain2': Argument(s) 'test.args' do not apply and were
## ignored. See `?tests` for details.
M3 Table: Features by Estrogen Bin, 3 months
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 771 low E2 Q1, N = 761 mid E2 Q2-Q4, N = 2291
ED_Age 33 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.14
    Some College or Associate Degree 28 (36%) 29 (38%) 95 (41%)
    Advanced Degree 5 (6.5%) 4 (5.3%) 18 (7.9%)
    High School or Less 32 (42%) 22 (29%) 85 (37%)
    Bachelor's Degree 12 (16%) 21 (28%) 31 (14%)
BMI 31 (10) 28 (6) 30 (9) 0.3
Race


0.3
    Hispanic 10 (13%) 13 (17%) 32 (14%)
    Non-Hispanic Black 32 (42%) 27 (36%) 110 (48%)
    Non-Hispanic Other 8 (10%) 5 (6.6%) 10 (4.4%)
    Non-Hispanic White 27 (35%) 31 (41%) 77 (34%)
sqrt.conc 10.85 (3.01) 3.01 (0.54) 5.11 (1.03) <0.001
ED_Pain 7 (3) 6 (3) 6 (3) 0.029
bin_pain


0.8
    Low to None 39 (51%) 37 (49%) 120 (52%)
    Moderate to Severe 38 (49%) 39 (51%) 109 (48%)
pain 4 (4) 5 (3) 4 (3) 0.4
bin_pain2


0.3
    0 or 1 pain 25 (32%) 16 (21%) 64 (28%)
    2 to 8 pain 42 (55%) 52 (68%) 147 (64%)
    9 or 10 pain 10 (13%) 8 (11%) 18 (7.9%)
bin_pain3


0.054
    0 pain 20 (26%) 10 (13%) 42 (18%)
    1 to 9 pain 50 (65%) 59 (78%) 178 (78%)
    10 pain 7 (9.1%) 7 (9.2%) 9 (3.9%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test; Fisher’s exact test
#no sig in pain

#month 6
e2_full_long %>%
  filter(time == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
  #add_overall() %>%
  add_p(test.args = c(bin_pain2,bin_pain3) ~ list(workspace=2e9)) %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Non-Hispanic White", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=638] = 1487357, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
## Note for variable 'bin_pain2': Argument(s) 'test.args' do not apply and were
## ignored. See `?tests` for details.
M6 Table: Features by Estrogen Bin, 6 months
Characteristic Relative Estrogen Bin p-value2
high E2 Q5, N = 801 low E2 Q1, N = 801 mid E2 Q2-Q4, N = 2371
ED_Age 32 (10) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.092
    Some College or Associate Degree 29 (36%) 30 (38%) 100 (42%)
    Advanced Degree 5 (6.3%) 4 (5.0%) 18 (7.6%)
    High School or Less 32 (40%) 23 (29%) 87 (37%)
    Bachelor's Degree 14 (18%) 23 (29%) 32 (14%)
BMI 31 (9) 28 (6) 30 (8) 0.3
Race



    Hispanic 10 (13%) 14 (18%) 33 (14%)
    Non-Hispanic Black 33 (41%) 27 (34%) 115 (49%)
    Non-Hispanic Other 8 (10%) 5 (6.3%) 10 (4.2%)
    Non-Hispanic White 29 (36%) 34 (43%) 79 (33%)
sqrt.conc 10.84 (3.06) 2.99 (0.56) 5.11 (1.02) <0.001
ED_Pain 7 (3) 6 (3) 6 (3) 0.024
bin_pain


>0.9
    Low to None 47 (59%) 45 (56%) 139 (59%)
    Moderate to Severe 33 (41%) 35 (44%) 98 (41%)
pain 4 (3) 4 (3) 4 (3) 0.4
bin_pain2


0.5
    0 or 1 pain 29 (36%) 25 (31%) 83 (35%)
    2 to 8 pain 45 (56%) 44 (55%) 137 (58%)
    9 or 10 pain 6 (7.5%) 11 (14%) 17 (7.2%)
bin_pain3


0.036
    0 pain 26 (33%) 14 (18%) 62 (26%)
    1 to 9 pain 50 (63%) 56 (70%) 165 (70%)
    10 pain 4 (5.0%) 10 (13%) 10 (4.2%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test; Fisher’s exact test
#bin pain 3 sig (p=0.036)

stats for new categories

table(e2_full_long$e2_bin, e2_full_long$bin_pain3)
##               
##                0 pain 1 to 9 pain 10 pain
##   high E2 Q5       57         158      19
##   low E2 Q1        31         179      24
##   mid E2 Q2-Q4    134         528      33
round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain3), margin = 1) * 100,2)
##               
##                0 pain 1 to 9 pain 10 pain
##   high E2 Q5    24.36       67.52    8.12
##   low E2 Q1     13.25       76.50   10.26
##   mid E2 Q2-Q4  19.28       75.97    4.75
chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"], e2_full_long[e2_full_long$time == 2, "bin_pain3"]) # p-value = 0.56
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 2, "e2_bin"] and e2_full_long[e2_full_long$time == 2, "bin_pain3"]
## X-squared = 2.9646, df = 4, p-value = 0.5638
chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"], e2_full_long[e2_full_long$time == 3, "bin_pain3"]) #p-value = 0.064
## Warning in chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 3, "e2_bin"] and e2_full_long[e2_full_long$time == 3, "bin_pain3"]
## X-squared = 8.8979, df = 4, p-value = 0.0637
chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"], e2_full_long[e2_full_long$time == 6, "bin_pain3"]) #p-value = 0.02649
## Warning in chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$time == 6, "e2_bin"] and e2_full_long[e2_full_long$time == 6, "bin_pain3"]
## X-squared = 11.007, df = 4, p-value = 0.02649

redo model from above

MARCH 2024 NEW E2 DATA CODE GLS MODEL

e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)

summary(e2_model)
## Generalized least squares fit by REML
##   Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain +      Race + Plate.Number + e2_bin 
##   Data: e2_full_long 
##       AIC      BIC   logLik
##   5568.24 5654.018 -2767.12
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | PID 
##  Parameter estimate(s):
##     Phi1 
## 0.730651 
## 
## Coefficients:
##                                         Value Std.Error   t-value p-value
## (Intercept)                         2.1746068 1.2202312  1.782127  0.0750
## time                               -0.2371992 0.0453832 -5.226584  0.0000
## sqrt.conc                           0.0018124 0.0761957  0.023786  0.9810
## ED_Age                              0.0412959 0.0098138  4.207956  0.0000
## ED_HighestGradeAdvanced Degree     -1.0719852 0.5218861 -2.054060  0.0402
## ED_HighestGradeHigh School or Less  0.0023889 0.2818121  0.008477  0.9932
## ED_HighestGradeBachelor's Degree   -0.7106412 0.3553445 -1.999865  0.0458
## BMI                                 0.0290101 0.0148775  1.949933  0.0514
## ED_Pain                             0.2548956 0.0496970  5.128991  0.0000
## RaceNon-Hispanic Black             -0.5212492 0.3760168 -1.386239  0.1659
## RaceNon-Hispanic Other             -0.4379841 0.6004024 -0.729484  0.4659
## RaceNon-Hispanic White             -0.5840843 0.3911167 -1.493376  0.1356
## Plate.Number                       -0.0251045 0.0188112 -1.334551  0.1823
## e2_binlow E2 Q1                     0.1232773 0.7186841  0.171532  0.8638
## e2_binmid E2 Q2-Q4                 -0.2817202 0.5423023 -0.519489  0.6035
## 
##  Correlation: 
##                                    (Intr) time   sqrt.c ED_Age ED_HGAD ED_SoL
## time                               -0.156                                    
## sqrt.conc                          -0.729  0.004                             
## ED_Age                             -0.254  0.003  0.096                      
## ED_HighestGradeAdvanced Degree     -0.212  0.003  0.071 -0.109               
## ED_HighestGradeHigh School or Less -0.217  0.001  0.073  0.101  0.241        
## ED_HighestGradeBachelor's Degree   -0.129  0.000  0.023 -0.052  0.237   0.338
## BMI                                -0.341  0.006 -0.010 -0.183  0.079   0.054
## ED_Pain                            -0.402  0.002  0.085  0.048  0.144  -0.063
## RaceNon-Hispanic Black             -0.216  0.000 -0.011 -0.076  0.225   0.043
## RaceNon-Hispanic Other             -0.246  0.002  0.055  0.028  0.123   0.005
## RaceNon-Hispanic White             -0.254 -0.001  0.020 -0.093  0.137   0.063
## Plate.Number                       -0.153 -0.001 -0.049 -0.009  0.053   0.062
## e2_binlow E2 Q1                    -0.700  0.005  0.829 -0.074  0.102   0.068
## e2_binmid E2 Q2-Q4                 -0.723  0.004  0.812  0.005  0.074   0.068
##                                    ED_HGBD BMI    ED_Pan RcN-HB RcN-HO RcN-HW
## time                                                                         
## sqrt.conc                                                                    
## ED_Age                                                                       
## ED_HighestGradeAdvanced Degree                                               
## ED_HighestGradeHigh School or Less                                           
## ED_HighestGradeBachelor's Degree                                             
## BMI                                 0.020                                    
## ED_Pain                             0.078   0.013                            
## RaceNon-Hispanic Black              0.070  -0.001  0.013                     
## RaceNon-Hispanic Other              0.054   0.023  0.110  0.469              
## RaceNon-Hispanic White             -0.046  -0.003  0.164  0.717  0.457       
## Plate.Number                        0.020   0.051  0.070 -0.044 -0.049 -0.113
## e2_binlow E2 Q1                    -0.002   0.073  0.122  0.035  0.085  0.062
## e2_binmid E2 Q2-Q4                  0.053   0.017  0.148 -0.007  0.101  0.049
##                                    Plt.Nm e2_E2Q1
## time                                             
## sqrt.conc                                        
## ED_Age                                           
## ED_HighestGradeAdvanced Degree                   
## ED_HighestGradeHigh School or Less               
## ED_HighestGradeBachelor's Degree                 
## BMI                                              
## ED_Pain                                          
## RaceNon-Hispanic Black                           
## RaceNon-Hispanic Other                           
## RaceNon-Hispanic White                           
## Plate.Number                                     
## e2_binlow E2 Q1                    -0.073        
## e2_binmid E2 Q2-Q4                 -0.038  0.873 
## 
## Standardized residuals:
##          Min           Q1          Med           Q3          Max 
## -2.102632201 -0.800003712  0.006275637  0.709551986  2.525482869 
## 
## Residual standard error: 3.006026 
## Degrees of freedom: 1163 total; 1148 residual
#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain3,
                                        `0 pain` = "1",
                                        `1 to 9 pain` = "2",
                                        `10 pain` = "3") #new dimensions 1163 x 17
#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2  p = 2.398452e-03 = 0.002398452  
## # weights:  48 (30 variable)
## initial  value 1277.686092 
## iter  10 value 977.801520
## iter  20 value 815.371103
## iter  30 value 769.544655
## iter  40 value 766.446223
## final  value 766.445652 
## converged
#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE)
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.1703722 0.7650969 0.2226806 0.8237841 -1.3291901 1.6699346
2 time -0.1979691 0.0442244 -4.4764671 0.0000076 -0.2846473 -0.1112909
2 sqrt.conc 0.0363906 0.0458435 0.7938017 0.4273109 -0.0534609 0.1262422
2 ED_Age 0.0251792 0.0066958 3.7604728 0.0001696 0.0120558 0.0383027
2 ED_HighestGradeAdvanced Degree 0.1104983 0.3435053 0.3216786 0.7476962 -0.5627598 0.7837564
2 ED_HighestGradeHigh School or Less -0.0807259 0.1795967 -0.4494841 0.6530825 -0.4327289 0.2712772
2 ED_HighestGradeBachelor’s Degree 0.2136520 0.2403628 0.8888728 0.3740715 -0.2574505 0.6847544
2 BMI 0.0117216 0.0098434 1.1908138 0.2337267 -0.0075710 0.0310143
2 ED_Pain 0.0990731 0.0319298 3.1028440 0.0019167 0.0364919 0.1616543
2 RaceNon-Hispanic Black -0.5567944 0.2671136 -2.0844852 0.0371161 -1.0803275 -0.0332613
2 RaceNon-Hispanic Other -0.2695170 0.3910681 -0.6891817 0.4907089 -1.0359964 0.4969624
2 RaceNon-Hispanic White -0.3525913 0.2768899 -1.2733990 0.2028765 -0.8952854 0.1901029
2 Plate.Number -0.0221147 0.0119525 -1.8502121 0.0642830 -0.0455412 0.0013118
2 e2_binlow E2 Q1 0.8999094 0.4502753 1.9985760 0.0456542 0.0173861 1.7824327
2 e2_binmid E2 Q2-Q4 0.5857043 0.3309789 1.7696120 0.0767918 -0.0630025 1.2344110
3 (Intercept) -2.8005863 1.5224774 -1.8394928 0.0658427 -5.7845872 0.1834146
3 time -0.2119677 0.0816147 -2.5971748 0.0093994 -0.3719296 -0.0520058
3 sqrt.conc 0.0245124 0.0913903 0.2682167 0.7885325 -0.1546092 0.2036340
3 ED_Age 0.0552166 0.0119522 4.6197746 0.0000038 0.0317907 0.0786425
3 ED_HighestGradeAdvanced Degree -1.6332431 1.0815625 -1.5100774 0.1310237 -3.7530667 0.4865804
3 ED_HighestGradeHigh School or Less -0.0021270 0.3043487 -0.0069887 0.9944238 -0.5986395 0.5943855
3 ED_HighestGradeBachelor’s Degree -0.7036320 0.5114034 -1.3758846 0.1688574 -1.7059642 0.2987002
3 BMI -0.0224145 0.0195472 -1.1466910 0.2515094 -0.0607263 0.0158972
3 ED_Pain 0.2941742 0.0642418 4.5791721 0.0000047 0.1682626 0.4200858
3 RaceNon-Hispanic Black -0.5559079 0.4183799 -1.3287157 0.1839418 -1.3759174 0.2641016
3 RaceNon-Hispanic Other -0.5137372 0.6962793 -0.7378320 0.4606165 -1.8784196 0.8509453
3 RaceNon-Hispanic White -0.7766032 0.4761271 -1.6310840 0.1028726 -1.7097951 0.1565887
3 Plate.Number -0.0439037 0.0221902 -1.9785238 0.0478696 -0.0873956 -0.0004118
3 e2_binlow E2 Q1 0.8055860 0.8141799 0.9894447 0.3224456 -0.7901773 2.4013493
3 e2_binmid E2 Q2-Q4 -0.1328456 0.6102937 -0.2176749 0.8276825 -1.3289994 1.0633082
#e2_bin low E2 on category 3 (10 pain)  p = 2.398452e-03 = 0.002398452  
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain



# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 2, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 421.867119 
## iter  10 value 247.599606
## iter  20 value 221.928655
## iter  30 value 221.001253
## iter  40 value 220.973076
## final  value 220.972555 
## converged
tidy(model_m2, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain 
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.1311589 1.5836329 8.282160e-02 0.9339934 -2.9727044 3.2350223
2 sqrt.conc -0.0163413 0.0943060 -1.732790e-01 0.8624321 -0.2011776 0.1684951
2 ED_Age 0.0340383 0.0143767 2.367602e+00 0.0179038 0.0058605 0.0622162
2 ED_HighestGradeAdvanced Degree 0.1388173 0.7293180 1.903385e-01 0.8490439 -1.2906196 1.5682542
2 ED_HighestGradeHigh School or Less -0.4155563 0.3747485 -1.108894e+00 0.2674759 -1.1500499 0.3189372
2 ED_HighestGradeBachelor’s Degree 0.1282390 0.5034145 2.547383e-01 0.7989252 -0.8584354 1.1149134
2 BMI -0.0002916 0.0199227 -1.463560e-02 0.9883229 -0.0393394 0.0387563
2 ED_Pain 0.2308309 0.0648503 3.559444e+00 0.0003716 0.1037267 0.3579350
2 RaceNon-Hispanic Black -0.9827641 0.6584997 -1.492429e+00 0.1355867 -2.2733998 0.3078716
2 RaceNon-Hispanic Other -0.8215715 0.8566076 -9.590990e-01 0.3375089 -2.5004916 0.8573485
2 RaceNon-Hispanic White -0.9507816 0.6677560 -1.423846e+00 0.1544911 -2.2595592 0.3579961
2 Plate.Number 0.0167237 0.0253585 6.594900e-01 0.5095812 -0.0329781 0.0664255
2 e2_binlow E2 Q1 0.2763326 0.9628684 2.869890e-01 0.7741208 -1.6108547 2.1635199
2 e2_binmid E2 Q2-Q4 0.0820186 0.7059307 1.161850e-01 0.9075059 -1.3015802 1.4656173
3 (Intercept) -2.4242005 2.5482795 -9.513087e-01 0.3414477 -7.4187365 2.5703355
3 sqrt.conc -0.0022809 0.1469001 -1.552690e-02 0.9876118 -0.2901999 0.2856381
3 ED_Age 0.0524826 0.0212763 2.466720e+00 0.0136357 0.0107819 0.0941834
3 ED_HighestGradeAdvanced Degree -14.3731028 0.0000030 -4.750017e+06 0.0000000 -14.3731087 -14.3730969
3 ED_HighestGradeHigh School or Less -0.5352624 0.5506261 -9.720977e-01 0.3310019 -1.6144697 0.5439450
3 ED_HighestGradeBachelor’s Degree -0.5856722 0.8206092 -7.137041e-01 0.4754101 -2.1940366 1.0226922
3 BMI -0.0444724 0.0346465 -1.283603e+00 0.1992808 -0.1123784 0.0234336
3 ED_Pain 0.4269331 0.1148537 3.717191e+00 0.0002015 0.2018240 0.6520422
3 RaceNon-Hispanic Black -1.1111457 0.8408800 -1.321408e+00 0.1863653 -2.7592403 0.5369489
3 RaceNon-Hispanic Other -1.6529745 1.3994927 -1.181124e+00 0.2375534 -4.3959297 1.0899807
3 RaceNon-Hispanic White -1.1583642 0.9015747 -1.284823e+00 0.1988541 -2.9254181 0.6086898
3 Plate.Number 0.0164285 0.0386844 4.246803e-01 0.6710698 -0.0593916 0.0922486
3 e2_binlow E2 Q1 0.0559009 1.4232599 3.927670e-02 0.9686698 -2.7336372 2.8454391
3 e2_binmid E2 Q2-Q4 -0.3858054 1.0600603 -3.639466e-01 0.7158979 -2.4634854 1.6918747
#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 3, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 419.669894 
## iter  10 value 266.160078
## iter  20 value 245.906643
## iter  30 value 245.107442
## final  value 245.104152 
## converged
tidy(model_m3, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #no significance
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) -0.9701946 1.3159434 -0.7372616 0.4609633 -3.5493962 1.6090071
2 sqrt.conc 0.0535287 0.0807280 0.6630748 0.5072826 -0.1046953 0.2117527
2 ED_Age 0.0269585 0.0117025 2.3036503 0.0212423 0.0040220 0.0498951
2 ED_HighestGradeAdvanced Degree 0.2718833 0.6243135 0.4354915 0.6632057 -0.9517488 1.4955153
2 ED_HighestGradeHigh School or Less -0.0117981 0.3088062 -0.0382054 0.9695239 -0.6170471 0.5934510
2 ED_HighestGradeBachelor’s Degree 0.3832827 0.4352584 0.8805866 0.3785416 -0.4698080 1.2363734
2 BMI 0.0212149 0.0175535 1.2085828 0.2268232 -0.0131894 0.0556191
2 ED_Pain 0.0620877 0.0557494 1.1136930 0.2654109 -0.0471791 0.1713544
2 RaceNon-Hispanic Black -0.8017810 0.4927088 -1.6272919 0.1036751 -1.7674724 0.1639105
2 RaceNon-Hispanic Other -0.1361759 0.7287832 -0.1868538 0.8517753 -1.5645648 1.2922130
2 RaceNon-Hispanic White -0.6849750 0.5097263 -1.3438096 0.1790100 -1.6840202 0.3140701
2 Plate.Number -0.0090328 0.0207976 -0.4343192 0.6640567 -0.0497954 0.0317298
2 e2_binlow E2 Q1 1.1126520 0.7832563 1.4205465 0.1554486 -0.4225021 2.6478060
2 e2_binmid E2 Q2-Q4 0.8586772 0.5744442 1.4947965 0.1349676 -0.2672128 1.9845671
3 (Intercept) -4.5286568 2.7704432 -1.6346326 0.1021261 -9.9586257 0.9013121
3 sqrt.conc 0.0530163 0.1690602 0.3135939 0.7538295 -0.2783357 0.3843682
3 ED_Age 0.0514629 0.0220781 2.3309514 0.0197559 0.0081907 0.0947351
3 ED_HighestGradeAdvanced Degree 0.0570339 1.2663068 0.0450396 0.9640758 -2.4248819 2.5389497
3 ED_HighestGradeHigh School or Less 0.4375417 0.5663424 0.7725744 0.4397743 -0.6724691 1.5475525
3 ED_HighestGradeBachelor’s Degree -0.1149097 0.9232596 -0.1244609 0.9009503 -1.9244652 1.6946458
3 BMI -0.0021299 0.0339560 -0.0627244 0.9499860 -0.0686823 0.0644226
3 ED_Pain 0.3462676 0.1243026 2.7856822 0.0053415 0.1026389 0.5898962
3 RaceNon-Hispanic Black -1.0872894 0.7433741 -1.4626408 0.1435657 -2.5442759 0.3696972
3 RaceNon-Hispanic Other -0.7309113 1.3363611 -0.5469415 0.5844189 -3.3501309 1.8883082
3 RaceNon-Hispanic White -0.8465528 0.8185463 -1.0342150 0.3010357 -2.4508740 0.7577684
3 Plate.Number -0.0820752 0.0446913 -1.8364919 0.0662849 -0.1696686 0.0055181
3 e2_binlow E2 Q1 1.0377023 1.4619024 0.7098301 0.4778095 -1.8275737 3.9029783
3 e2_binmid E2 Q2-Q4 -0.1145481 1.0823714 -0.1058307 0.9157167 -2.2359572 2.0068609
#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, 
                     subset = time == 6, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 436.149079 
## iter  10 value 306.681378
## iter  20 value 282.827630
## iter  30 value 281.153243
## iter  40 value 281.105845
## final  value 281.104843 
## converged
tidy(model_m6, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) -0.6797556 1.1453026 -5.935161e-01 0.5528358 -2.9245075 1.5649963
2 sqrt.conc 0.0580868 0.0713283 8.143595e-01 0.4154390 -0.0817140 0.1978876
2 ED_Age 0.0199008 0.0101594 1.958861e+00 0.0501290 -0.0000112 0.0398128
2 ED_HighestGradeAdvanced Degree -0.0242110 0.5138717 -4.711480e-02 0.9624217 -1.0313811 0.9829592
2 ED_HighestGradeHigh School or Less 0.0685121 0.2803377 2.443913e-01 0.8069278 -0.4809397 0.6179639
2 ED_HighestGradeBachelor’s Degree 0.1315121 0.3593481 3.659740e-01 0.7143845 -0.5727973 0.8358214
2 BMI 0.0115781 0.0151579 7.638301e-01 0.4449685 -0.0181309 0.0412870
2 ED_Pain 0.0503091 0.0500570 1.005036e+00 0.3148794 -0.0478008 0.1484190
2 RaceNon-Hispanic Black -0.2536683 0.3806172 -6.664657e-01 0.5051135 -0.9996643 0.4923277
2 RaceNon-Hispanic Other -0.1679640 0.5848960 -2.871690e-01 0.7739830 -1.3143391 0.9784111
2 RaceNon-Hispanic White 0.1132893 0.3982906 2.844389e-01 0.7760741 -0.6673458 0.8939245
2 Plate.Number -0.0546909 0.0185820 -2.943225e+00 0.0032481 -0.0911109 -0.0182709
2 e2_binlow E2 Q1 1.1190994 0.6923761 1.616317e+00 0.1060258 -0.2379329 2.4761317
2 e2_binmid E2 Q2-Q4 0.6650044 0.5114145 1.300324e+00 0.1934900 -0.3373496 1.6673584
3 (Intercept) -4.2024389 2.8039702 -1.498746e+00 0.1339396 -9.6981194 1.2932416
3 sqrt.conc -0.0018952 0.1885286 -1.005250e-02 0.9919794 -0.3714044 0.3676140
3 ED_Age 0.0739300 0.0215254 3.434539e+00 0.0005936 0.0317409 0.1161191
3 ED_HighestGradeAdvanced Degree -13.6569224 0.0000054 -2.509571e+06 0.0000000 -13.6569331 -13.6569118
3 ED_HighestGradeHigh School or Less 0.0792444 0.5192663 1.526085e-01 0.8787070 -0.9384988 1.0969877
3 ED_HighestGradeBachelor’s Degree -1.5352518 1.1238222 -1.366099e+00 0.1719080 -3.7379027 0.6673992
3 BMI -0.0253108 0.0350263 -7.226232e-01 0.4699114 -0.0939611 0.0433395
3 ED_Pain 0.1899685 0.1041265 1.824400e+00 0.0680916 -0.0141158 0.3940528
3 RaceNon-Hispanic Black 0.1784226 0.7516905 2.373618e-01 0.8123761 -1.2948637 1.6517090
3 RaceNon-Hispanic Other 0.5406662 1.0958821 4.933616e-01 0.6217571 -1.6072233 2.6885557
3 RaceNon-Hispanic White -0.9287545 0.9489130 -9.787562e-01 0.3277005 -2.7885899 0.9310809
3 Plate.Number -0.0642080 0.0381791 -1.681761e+00 0.0926153 -0.1390376 0.0106215
3 e2_binlow E2 Q1 1.1705709 1.5664553 7.472737e-01 0.4548984 -1.8996252 4.2407669
3 e2_binmid E2 Q2-Q4 -0.0904802 1.1792451 -7.672720e-02 0.9388406 -2.4017582 2.2207978
#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)

miRNA original code for miRNA

#E2_women_wide_all<- E2_women_wide %>% 
#  inner_join(ID_miRNA,by="PID") 

#E2_women_long_all<- E2_women_long %>% 
#  inner_join(ID_miRNA,by="PID") 

NEW MARCH 2024 CODE FOR miRNA

none of the PIDs match up between the new estrogen data and the miRNA data

#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)

e2_wide_all<- e2_full_wide %>% 
  inner_join(ID_miRNA,by="PID") 

E2_long_all<- e2_full_long %>% 
  inner_join(ID_miRNA,by="PID")