#library(edgeR);#library(sva)
library(data.table);library(ggfortify)
## Loading required package: ggplot2
library(factoextra);library(ClassDiscovery)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Loading required package: cluster
## Loading required package: oompaBase
library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)
##
## Attaching package: 'memisc'
## The following object is masked from 'package:ggplot2':
##
## syms
## The following objects are masked from 'package:stats':
##
## contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
##
## as.array
library(tidyverse);library(naniar);library(gtsummary)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%() masks memisc::%@%()
## ✖ lubridate::as.interval() masks memisc::as.interval()
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collect() masks memisc::collect()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::is.interval() masks memisc::is.interval()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ dplyr::recode() masks memisc::recode()
## ✖ dplyr::rename() masks memisc::rename()
## ✖ lubridate::second() masks data.table::second()
## ✖ dplyr::select() masks MASS::select()
## ✖ dplyr::syms() masks memisc::syms(), ggplot2::syms()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ tibble::view() masks memisc::view()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'gtsummary'
##
##
## The following object is masked from 'package:MASS':
##
## select
require(lme4); require(nnet); require(tidyverse);
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: nnet
library(broom);library(readxl);library(nlme)
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:lme4':
##
## lmList
##
## The following object is masked from 'package:dplyr':
##
## collapse
NEW DATA FILES
#aurora data
df0 <- read.csv("full_aurora.csv")
estrogen data
#estrogen levels
e2_conc <- read.csv("E2_concentrated.csv")
#e2_od <- read.csv("E2_OD.csv") #Do I need this data?
#filter out the 0 values, 420 values, and 0 CV values
e2_conc$Mean.Concentration..pg.mL. <- as.numeric(e2_conc$Mean.Concentration..pg.mL.)
## Warning: NAs introduced by coercion
e2_conc$X..CV <- as.numeric(e2_conc$X..CV)
## Warning: NAs introduced by coercion
#filter add bad samples
e2_conc <- e2_conc %>%
rename(mean.conc = Mean.Concentration..pg.mL.) %>%
filter(mean.conc > 0) %>%
filter(mean.conc < 420) %>%
filter(`X..CV` > 0) %>%
rename(CV = X..CV)
#square root transform concentration
e2_conc <- e2_conc %>%
mutate(sqrt.conc = sqrt(mean.conc)) %>%
select(everything(),mean.conc, sqrt.conc)
#remove extra columns
e2_conc <- e2_conc[, -c(4, 5, 7, 9, 10)] #dimensions 860 x 6
keys
#PID to inventory ID key for estrogen
key <- read_excel("e2_pid_key.xlsx")
#sample ID to inventory ID and PID Keys for miRNA
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")
miRNA data
#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned)
#miRNA_cleaned dimensions 596 x 1552
Link estrogen data with key
e2_keyed <- e2_conc %>%
right_join(key,by=c("Inventory.Code" = "InventoryCode"))
#the key has 906 inventory ID/PIDs and the estrogen only has 860 samples
Link miRNA with keys
ID2 <- ID_link %>%
right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub"))
ID_miRNA <- miRNA_cleaned %>%
mutate(sample=as.numeric(sample)) %>%
left_join(ID2,by="sample") #596 samples
length(unique(ID_miRNA$PID)) # unique PID 294
## [1] 294
subset to only the timepoint 0/ED visit miRNA values
ID_miRNA_T0 <- ID_miRNA %>%
filter(`ALT ID` == "T0") #dimensions 294 x 1557
ORIGNAL aurora data, estrogen
#Use the data from Jarred directly, not read in the data saved, because it will change some variable names format
df <- read.csv("AURORA_estrogen_excl_outliers_excl_BMI.csv") # dimensions 283 x 3035
E2_women_excl_outliers_excl_BMI <- df[df$ED_GenderBirthCert == 2,] #dim 197 x 3035
#E2_men_excl_outliers_excl_BMI <- df[df$ED_GenderBirthCert == 1,] #86, 3035 #not using men rn
###E2_women (excluding outliers but added BMI) #table 2
E2_women_wide <- E2_women_excl_outliers_excl_BMI[,c("PID","quartile","tertile","median","ED_Age","HighestGrade","BMI","WK8_Pain","M3_Pain","M6_Pain","E2.Concentration..pg.ml._T0_sqrt","ED_NowPain.x","Race","Date.Run_T0")] #dimensions 197 x 14
summary(E2_women_wide)
## PID quartile tertile median
## Min. :100183 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:102989 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :106076 Median :2.000 Median :2.000 Median :1.000
## Mean :105821 Mean :2.492 Mean :1.995 Mean :1.497
## 3rd Qu.:108692 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :111301 Max. :4.000 Max. :3.000 Max. :2.000
##
## ED_Age HighestGrade BMI WK8_Pain
## Min. :18.00 Min. :1.000 Min. :17.30 Min. : 0.000
## 1st Qu.:27.00 1st Qu.:2.000 1st Qu.:25.43 1st Qu.: 3.000
## Median :37.00 Median :3.000 Median :30.33 Median : 5.000
## Mean :39.37 Mean :2.883 Mean :31.43 Mean : 5.211
## 3rd Qu.:51.00 3rd Qu.:4.000 3rd Qu.:36.02 3rd Qu.: 8.000
## Max. :73.00 Max. :4.000 Max. :57.50 Max. :10.000
## NA's :1 NA's :27 NA's :12
## M3_Pain M6_Pain E2.Concentration..pg.ml._T0_sqrt
## Min. : 0.000 Min. : 0.000 Min. : 0.8124
## 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 4.1975
## Median : 5.000 Median : 4.000 Median : 7.1516
## Mean : 4.716 Mean : 4.085 Mean : 8.6909
## 3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.:13.0289
## Max. :10.000 Max. :10.000 Max. :21.7290
## NA's :14 NA's :21
## ED_NowPain.x Race Date.Run_T0
## Min. : 0.000 Min. :1.000 Min. :44032
## 1st Qu.: 5.000 1st Qu.:2.000 1st Qu.:44039
## Median : 7.000 Median :3.000 Median :44063
## Mean : 6.655 Mean :2.587 Mean :44073
## 3rd Qu.: 8.000 3rd Qu.:3.000 3rd Qu.:44082
## Max. :10.000 Max. :3.000 Max. :44157
## NA's :1
length(unique(E2_women_wide[["PID"]])) #197 unique PID
## [1] 197
March 2024 data Create wide df of estrogen and pain data
#aurora data is df0 #4745 rows
#estrogen data is e2_keyed #906 rows
#combine e2 and aurora data
e2_keyed_full <- e2_keyed %>%
left_join(df0,by=c("PID" = "PID")) #1217 rows x 353 columns
## Warning in left_join(., df0, by = c(PID = "PID")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 15 of `x` matches multiple rows in `y`.
## ℹ Row 946 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
#where is the NRS pain score
grep("NRS", names(e2_keyed_full), value = TRUE)
## [1] "ED_Somatic_NRS_SUM" "WK2_Somatic_NRS_SUM" "WK8_Somatic_NRS_SUM"
## [4] "M3_Somatic_NRS_SUM" "M6_Somatic_NRS_SUM"
#This is only the NRS sum count, so a value greater than 10, not the 0-10 NRS value. Will use something else.
#Do I use ED_NowPain_C or ED_Pain_C for ED pain?
#I don't have the "Date.Run_T0" value for this new set of 2024 estrogen values. maybe I don't need it ?
#added plate number to control for batch effects
#create wide df below
e2_full_wide <- e2_keyed_full[,c("PID","ED_Age","ED_HighestGrade","BMI","ED_RaceEthCode","sqrt.conc","ED_NowPain_C","WK8_Pain_C", "M3_Pain_C", "M6_Pain_C","Plate.Number", "AlternateID")] #dim 1217 x 12
e2_full_wide %>%
inner_join(ID2,by="PID")
## Warning in inner_join(., ID2, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 123 of `x` matches multiple rows in `y`.
## ℹ Row 11 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## PID ED_Age ED_HighestGrade BMI ED_RaceEthCode
## 1 101394 22 High school graduate NA Non-Hispanic Black
## 2 101394 22 High school graduate NA Non-Hispanic Black
## 3 101394 22 High school graduate NA Non-Hispanic Black
## 4 101394 22 High school graduate NA Non-Hispanic Black
## 5 101394 22 High school graduate NA Non-Hispanic Black
## 6 101394 22 High school graduate NA Non-Hispanic Black
## 7 101394 22 High school graduate NA Non-Hispanic Black
## 8 101394 22 High school graduate NA Non-Hispanic Black
## 9 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 10 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 11 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 12 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 13 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 14 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 15 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 16 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 17 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 18 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 19 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 20 110747 60 Some college, no degree 33.7 Non-Hispanic Black
## 21 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 22 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 23 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 24 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 25 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 26 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 27 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 28 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 29 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 30 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 31 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 32 110965 20 Some college, no degree 25.1 Non-Hispanic White
## 33 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 34 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 35 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 36 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 37 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 38 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 39 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 40 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 41 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 42 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 43 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 44 110813 35 Bachelor's degree: BA, AB, BS, BBA 23.1 Non-Hispanic Black
## 45 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 46 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 47 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 48 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 49 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 50 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 51 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 52 111301 23 Some college, no degree 17.3 Non-Hispanic White
## 53 110468 41 High school graduate 51.1 Non-Hispanic White
## 54 110468 41 High school graduate 51.1 Non-Hispanic White
## 55 110468 41 High school graduate 51.1 Non-Hispanic White
## 56 110468 41 High school graduate 51.1 Non-Hispanic White
## 57 110468 41 High school graduate 51.1 Non-Hispanic White
## 58 110468 41 High school graduate 51.1 Non-Hispanic White
## 59 110468 41 High school graduate 51.1 Non-Hispanic White
## 60 110468 41 High school graduate 51.1 Non-Hispanic White
## 61 111201 21 High school graduate 22.7 Non-Hispanic Black
## 62 111201 21 High school graduate 22.7 Non-Hispanic Black
## 63 111201 21 High school graduate 22.7 Non-Hispanic Black
## 64 111201 21 High school graduate 22.7 Non-Hispanic Black
## 65 111201 21 High school graduate 22.7 Non-Hispanic Black
## 66 111201 21 High school graduate 22.7 Non-Hispanic Black
## 67 111128 29 High school graduate 32.3 Non-Hispanic White
## 68 111128 29 High school graduate 32.3 Non-Hispanic White
## 69 111128 29 High school graduate 32.3 Non-Hispanic White
## 70 111128 29 High school graduate 32.3 Non-Hispanic White
## 71 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 72 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 73 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 74 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 75 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 76 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 77 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 78 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 79 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 80 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 81 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 82 104627 52 Some college, no degree 23.8 Non-Hispanic White
## 83 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 84 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 85 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 86 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 87 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 88 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 89 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 90 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 91 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 92 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 93 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## 94 108726 43 Some college, no degree 34.0 Non-Hispanic Black
## sqrt.conc ED_NowPain_C WK8_Pain_C M3_Pain_C M6_Pain_C Plate.Number
## 1 14.870289 6 0 0 0 4
## 2 14.870289 6 0 0 0 4
## 3 14.870289 6 0 0 0 4
## 4 14.870289 6 0 0 0 4
## 5 14.870289 6 0 0 0 4
## 6 14.870289 6 0 0 0 4
## 7 14.870289 6 0 0 0 4
## 8 14.870289 6 0 0 0 4
## 9 3.411891 4 4 5 3 9
## 10 3.411891 4 4 5 3 9
## 11 3.411891 4 4 5 3 9
## 12 3.411891 4 4 5 3 9
## 13 3.411891 4 4 5 3 9
## 14 3.411891 4 4 5 3 9
## 15 3.411891 4 4 5 3 9
## 16 3.411891 4 4 5 3 9
## 17 3.411891 4 4 5 3 9
## 18 3.411891 4 4 5 3 9
## 19 3.411891 4 4 5 3 9
## 20 3.411891 4 4 5 3 9
## 21 4.063250 5 0 0 0 9
## 22 4.063250 5 0 0 0 9
## 23 4.063250 5 0 0 0 9
## 24 4.063250 5 0 0 0 9
## 25 4.063250 5 0 0 0 9
## 26 4.063250 5 0 0 0 9
## 27 4.063250 5 0 0 0 9
## 28 4.063250 5 0 0 0 9
## 29 4.063250 5 0 0 0 9
## 30 4.063250 5 0 0 0 9
## 31 4.063250 5 0 0 0 9
## 32 4.063250 5 0 0 0 9
## 33 13.573504 4 8 8 7 10
## 34 13.573504 4 8 8 7 10
## 35 13.573504 4 8 8 7 10
## 36 13.573504 4 8 8 7 10
## 37 13.573504 4 8 8 7 10
## 38 13.573504 4 8 8 7 10
## 39 13.573504 4 8 8 7 10
## 40 13.573504 4 8 8 7 10
## 41 13.573504 4 8 8 7 10
## 42 13.573504 4 8 8 7 10
## 43 13.573504 4 8 8 7 10
## 44 13.573504 4 8 8 7 10
## 45 3.735706 6 4 NA 3 10
## 46 3.735706 6 4 NA 3 10
## 47 3.735706 6 4 NA 3 10
## 48 3.735706 6 4 NA 3 10
## 49 3.735706 6 4 NA 3 10
## 50 3.735706 6 4 NA 3 10
## 51 3.735706 6 4 NA 3 10
## 52 3.735706 6 4 NA 3 10
## 53 4.842365 7 NA 6 5 10
## 54 4.842365 7 NA 6 5 10
## 55 4.842365 7 NA 6 5 10
## 56 4.842365 7 NA 6 5 10
## 57 4.842365 7 NA 6 5 10
## 58 4.842365 7 NA 6 5 10
## 59 4.842365 7 NA 6 5 10
## 60 4.842365 7 NA 6 5 10
## 61 6.199113 7 7 6 6 11
## 62 6.199113 7 7 6 6 11
## 63 6.199113 7 7 6 6 11
## 64 6.199113 7 7 6 6 11
## 65 6.199113 7 7 6 6 11
## 66 6.199113 7 7 6 6 11
## 67 4.701542 8 7 7 6 16
## 68 4.701542 8 7 7 6 16
## 69 4.701542 8 7 7 6 16
## 70 4.701542 8 7 7 6 16
## 71 3.681304 6 7 4 8 17
## 72 3.681304 6 7 4 8 17
## 73 3.681304 6 7 4 8 17
## 74 3.681304 6 7 4 8 17
## 75 3.681304 6 7 4 8 17
## 76 3.681304 6 7 4 8 17
## 77 3.681304 6 7 4 8 17
## 78 3.681304 6 7 4 8 17
## 79 3.681304 6 7 4 8 17
## 80 3.681304 6 7 4 8 17
## 81 3.681304 6 7 4 8 17
## 82 3.681304 6 7 4 8 17
## 83 4.382009 5 6 4 4 17
## 84 4.382009 5 6 4 4 17
## 85 4.382009 5 6 4 4 17
## 86 4.382009 5 6 4 4 17
## 87 4.382009 5 6 4 4 17
## 88 4.382009 5 6 4 4 17
## 89 4.382009 5 6 4 4 17
## 90 4.382009 5 6 4 4 17
## 91 4.382009 5 6 4 4 17
## 92 4.382009 5 6 4 4 17
## 93 4.382009 5 6 4 4 17
## 94 4.382009 5 6 4 4 17
## AlternateID SAMPLE ID ALT ID INVENTORY CODE Sample Type sample
## 1 T1 824-1024 T3 XA0050274 RNA1 522
## 2 T1 824-1024 T0 XA0045353 RNA1 2
## 3 T1 824-1024 T3 XA0050274 RNA1 522
## 4 T1 824-1024 T0 XA0045353 RNA1 2
## 5 T1 824-1024 T3 XA0050274 RNA1 522
## 6 T1 824-1024 T0 XA0045353 RNA1 2
## 7 T1 824-1024 T3 XA0050274 RNA1 522
## 8 T1 824-1024 T0 XA0045353 RNA1 2
## 9 T3 846-1109 T1 XA0055516 RNA1 336
## 10 T3 846-1109 T0 XA0055259 RNA1 148
## 11 T3 846-1109 T1 XA0055516 RNA1 336
## 12 T3 846-1109 T0 XA0055259 RNA1 148
## 13 T3 846-1109 T1 XA0055516 RNA1 336
## 14 T3 846-1109 T0 XA0055259 RNA1 148
## 15 T3 846-1109 T1 XA0055516 RNA1 336
## 16 T3 846-1109 T0 XA0055259 RNA1 148
## 17 T3 846-1109 T1 XA0055516 RNA1 336
## 18 T3 846-1109 T0 XA0055259 RNA1 148
## 19 T3 846-1109 T1 XA0055516 RNA1 336
## 20 T3 846-1109 T0 XA0055259 RNA1 148
## 21 T3 846-1112 T1 XA0055530 RNA1 341
## 22 T3 846-1112 T0 XA0055329 RNA1 430
## 23 T3 846-1112 T1 XA0055530 RNA1 341
## 24 T3 846-1112 T0 XA0055329 RNA1 430
## 25 T3 846-1112 T1 XA0055530 RNA1 341
## 26 T3 846-1112 T0 XA0055329 RNA1 430
## 27 T3 846-1112 T1 XA0055530 RNA1 341
## 28 T3 846-1112 T0 XA0055329 RNA1 430
## 29 T3 846-1112 T1 XA0055530 RNA1 341
## 30 T3 846-1112 T0 XA0055329 RNA1 430
## 31 T3 846-1112 T1 XA0055530 RNA1 341
## 32 T3 846-1112 T0 XA0055329 RNA1 430
## 33 T3 846-1110 T1 XA0055534 RNA1 343
## 34 T3 846-1110 T0 XA0055331 RNA1 151
## 35 T3 846-1110 T1 XA0055534 RNA1 343
## 36 T3 846-1110 T0 XA0055331 RNA1 151
## 37 T3 846-1110 T1 XA0055534 RNA1 343
## 38 T3 846-1110 T0 XA0055331 RNA1 151
## 39 T3 846-1110 T1 XA0055534 RNA1 343
## 40 T3 846-1110 T0 XA0055331 RNA1 151
## 41 T3 846-1110 T1 XA0055534 RNA1 343
## 42 T3 846-1110 T0 XA0055331 RNA1 151
## 43 T3 846-1110 T1 XA0055534 RNA1 343
## 44 T3 846-1110 T0 XA0055331 RNA1 151
## 45 T3 846-1117 T1 XA0055521 RNA1 338
## 46 T3 846-1117 T0 XA0055300 RNA1 429
## 47 T3 846-1117 T1 XA0055521 RNA1 338
## 48 T3 846-1117 T0 XA0055300 RNA1 429
## 49 T3 846-1117 T1 XA0055521 RNA1 338
## 50 T3 846-1117 T0 XA0055300 RNA1 429
## 51 T3 846-1117 T1 XA0055521 RNA1 338
## 52 T3 846-1117 T0 XA0055300 RNA1 429
## 53 T3 873-1054 T0 XA0054511 RNA1 299
## 54 T3 873-1054 T1 XA0054622 RNA1 305
## 55 T3 873-1054 T0 XA0054511 RNA1 299
## 56 T3 873-1054 T1 XA0054622 RNA1 305
## 57 T3 873-1054 T0 XA0054511 RNA1 299
## 58 T3 873-1054 T1 XA0054622 RNA1 305
## 59 T3 873-1054 T0 XA0054511 RNA1 299
## 60 T3 873-1054 T1 XA0054622 RNA1 305
## 61 T3 825-1110 T0 XA0055257 RNA1 168
## 62 T3 825-1110 T1 XA0055225 RNA1 575
## 63 T3 825-1110 T0 XA0055257 RNA1 168
## 64 T3 825-1110 T1 XA0055225 RNA1 575
## 65 T3 825-1110 T0 XA0055257 RNA1 168
## 66 T3 825-1110 T1 XA0055225 RNA1 575
## 67 T3 828-1216 T0 XA0054998 RNA1 573
## 68 T3 828-1216 T1 XA0054147 RNA1 119
## 69 T3 828-1216 T0 XA0054998 RNA1 573
## 70 T3 828-1216 T1 XA0054147 RNA1 119
## 71 T3 827-1008 T1 XA0047505 RNA1 485
## 72 T3 827-1008 T0 XA0045856 RNA1 450
## 73 T3 827-1008 T1 XA0047505 RNA1 485
## 74 T3 827-1008 T0 XA0045856 RNA1 450
## 75 T3 827-1008 T1 XA0047505 RNA1 485
## 76 T3 827-1008 T0 XA0045856 RNA1 450
## 77 T3 827-1008 T1 XA0047505 RNA1 485
## 78 T3 827-1008 T0 XA0045856 RNA1 450
## 79 T3 827-1008 T1 XA0047505 RNA1 485
## 80 T3 827-1008 T0 XA0045856 RNA1 450
## 81 T3 827-1008 T1 XA0047505 RNA1 485
## 82 T3 827-1008 T0 XA0045856 RNA1 450
## 83 T3 828-1145 T0 XA0053287 RNA1 406
## 84 T3 828-1145 T1 XA0053178 RNA1 86
## 85 T3 828-1145 T0 XA0053287 RNA1 406
## 86 T3 828-1145 T1 XA0053178 RNA1 86
## 87 T3 828-1145 T0 XA0053287 RNA1 406
## 88 T3 828-1145 T1 XA0053178 RNA1 86
## 89 T3 828-1145 T0 XA0053287 RNA1 406
## 90 T3 828-1145 T1 XA0053178 RNA1 86
## 91 T3 828-1145 T0 XA0053287 RNA1 406
## 92 T3 828-1145 T1 XA0053178 RNA1 86
## 93 T3 828-1145 T0 XA0053287 RNA1 406
## 94 T3 828-1145 T1 XA0053178 RNA1 86
#at this above step which includes all time points of E2 for the March 2024 data, there are 94 overlapping PIDs with the 2021 miRNA data
#rename some of the columns
e2_full_wide <- e2_full_wide %>%
rename(ED_Pain = ED_NowPain_C) %>%
rename(WK8_Pain = WK8_Pain_C) %>%
rename(M3_Pain = M3_Pain_C) %>%
rename(M6_Pain = M6_Pain_C) %>%
rename(Race = ED_RaceEthCode)
#Remove cases without estrogen measurements
e2_full_wide <- e2_full_wide[complete.cases(e2_full_wide$sqrt.conc), ] #dimensions 1161 x 12
#Keep ONLY estrogen time point T0, E2 at time of ED visit
e2_full_wide <- e2_full_wide %>%
filter(AlternateID == "T0") #dimensions 558 x 12
#note: after the above step of keeping only T0 values, there remain no overlapping PIDs between the March 2024 E2 data and the 2021 miRNA data.
#summarize wide df and see how many unique PIDs
#summary(e2_full_wide)
length(unique(e2_full_wide[["PID"]])) #432 unique PID
## [1] 432
NEW MARCH 2024 CODE fix the education category
unique(e2_full_wide$ED_HighestGrade)
## [1] "High school graduate"
## [2] "Some college, no degree"
## [3] "Associate degree: Occupational, technical, or vocational program"
## [4] "Bachelor's degree: BA, AB, BS, BBA"
## [5] "GED or equivalent"
## [6] "Associate degree: Academic program"
## [7] "Doctoral degree: PhD, EdD"
## [8] "Master's degree: MA, MS, MEng, MEd, MBA"
## [9] "Professional school degree: MD, DDS, DVM, JD"
## [10] "8th grade"
## [11] "11th grade"
## [12] "12th grade, no diploma"
## [13] "9th grade"
## [14] "10th grade"
## [15] "7th grade"
#reduce education categories to 4 levels
level_mapping <- c(
"Some college, no degree" = "Some College or Associate Degree",
"Professional school degree: MD, DDS, DVM, JD" = "Advanced Degree",
"Associate degree: Occupational, technical, or vocational program" = "Some College or Associate Degree",
"High school graduate" = "High School or Less",
"Bachelor's degree: BA, AB, BS, BBA" = "Bachelor's Degree",
"Master's degree: MA, MS, MEng, MEd, MBA" = "Advanced Degree",
"GED or equivalent" = "High School or Less",
"8th grade" = "High School or Less",
"Doctoral degree: PhD, EdD" = "Advanced Degree",
"Associate degree: Academic program" = "Some College or Associate Degree",
"12th grade, no diploma" = "High School or Less",
"11th grade" = "High School or Less",
"9th grade" = "High School or Less",
"10th grade" = "High School or Less",
"7th grade" = "High School or Less"
)
#apply new mapping to education category
e2_full_wide$ED_HighestGrade <- factor(e2_full_wide$ED_HighestGrade, levels = names(level_mapping), labels = level_mapping)
#dim 558 x 12
Original code
gather(E2_women_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
mutate(time = ifelse(names=="WK2_Pain",0.5,ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA))))) %>%
arrange(PID,time) -> E2_women_long
E2_women_long <- E2_women_long[complete.cases(E2_women_long),]
length(unique(E2_women_long[["PID"]])) #164 PID
## [1] 164
#summary(E2_women_long$E2.Concentration..pg.ml._T0_sqrt)
March 2024 data Create long df of estrogen and pain data changes: updated df used, removed Week2 pain rename
gather(e2_full_wide, key=names, value=pain,WK8_Pain,M3_Pain,M6_Pain) %>%
mutate(time = ifelse(names=="WK8_Pain",2,ifelse(names=="M3_Pain",3,ifelse(names=="M6_Pain",6,NA)))) %>%
arrange(PID,time) -> e2_full_long
e2_full_long <- e2_full_long[complete.cases(e2_full_long),] #dim 1541 x 12
length(unique(e2_full_long[["PID"]])) #397 PID
## [1] 397
#summary(e2_full_long$sqrt.conc)
#there are a few random duplicates that need to be removed
e2_full_long <- e2_full_long[!duplicated(e2_full_long), ] #dim 1163 x 12
ORIGINAL GLS MODEL CODE
#model (table2a, left side)
E2_women_long$E2_Concentration_T0_sqrt = E2_women_long$E2.Concentration..pg.ml._T0_sqrt
E2_women_long$Date_Run_T0 = E2_women_long$Date.Run_T0
E2_women_model <- gls(pain ~ time + E2_Concentration_T0_sqrt + ED_Age + HighestGrade + BMI + ED_NowPain.x + Race + Date_Run_T0, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=E2_women_long)
summary(E2_women_model)
## Generalized least squares fit by REML
## Model: pain ~ time + E2_Concentration_T0_sqrt + ED_Age + HighestGrade + BMI + ED_NowPain.x + Race + Date_Run_T0
## Data: E2_women_long
## AIC BIC logLik
## 2257.843 2303.215 -1117.922
##
## Correlation Structure: ARMA(1,0)
## Formula: ~time | PID
## Parameter estimate(s):
## Phi1
## 0.7745422
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -210.08763 215.99869 -0.972634 0.3312
## time -0.26382 0.07157 -3.686142 0.0003
## E2_Concentration_T0_sqrt -0.04854 0.04138 -1.173080 0.2414
## ED_Age 0.03183 0.01658 1.920209 0.0555
## HighestGrade -0.16961 0.24209 -0.700584 0.4839
## BMI 0.02026 0.02838 0.713853 0.4757
## ED_NowPain.x 0.37291 0.08619 4.326428 0.0000
## Race 0.37034 0.38705 0.956813 0.3392
## Date_Run_T0 0.00480 0.00490 0.978950 0.3281
##
## Correlation:
## (Intr) time E2_C_T ED_Age HghstG BMI ED_NP.
## time -0.004
## E2_Concentration_T0_sqrt 0.193 0.012
## ED_Age -0.036 -0.008 0.269
## HighestGrade 0.186 -0.006 -0.018 -0.300
## BMI -0.065 0.003 0.007 -0.312 0.080
## ED_NowPain.x -0.056 -0.004 -0.092 -0.048 0.052 -0.228
## Race -0.159 -0.020 -0.082 0.018 0.159 -0.064 -0.104
## Date_Run_T0 -1.000 0.002 -0.194 0.035 -0.190 0.062 0.055
## Race
## time
## E2_Concentration_T0_sqrt
## ED_Age
## HighestGrade
## BMI
## ED_NowPain.x
## Race
## Date_Run_T0 0.154
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.11992096 -0.78910606 -0.05550526 0.79878542 2.21845145
##
## Residual standard error: 3.130053
## Degrees of freedom: 466 total; 457 residual
MARCH 2024 NEW E2 DATA CODE GLS MODEL
#took out highest grade because its not coded correctly
e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)
summary(e2_model)
## Generalized least squares fit by REML
## Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number
## Data: e2_full_long
## AIC BIC logLik
## 5566.51 5642.223 -2768.255
##
## Correlation Structure: ARMA(1,0)
## Formula: ~time | PID
## Parameter estimate(s):
## Phi1
## 0.73062
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.9370128 0.8249753 2.347965 0.0190
## time -0.2372621 0.0453816 -5.228156 0.0000
## sqrt.conc 0.0097417 0.0403274 0.241564 0.8092
## ED_Age 0.0433499 0.0096850 4.475991 0.0000
## ED_HighestGradeAdvanced Degree -1.1040972 0.5188331 -2.128039 0.0335
## ED_HighestGradeHigh School or Less 0.0062920 0.2810954 0.022384 0.9821
## ED_HighestGradeBachelor's Degree -0.6553079 0.3530373 -1.856200 0.0637
## BMI 0.0268486 0.0147667 1.818177 0.0693
## ED_Pain 0.2596334 0.0491419 5.283337 0.0000
## RaceNon-Hispanic Black -0.5629988 0.3746560 -1.502709 0.1332
## RaceNon-Hispanic Other -0.4014009 0.5972709 -0.672058 0.5017
## RaceNon-Hispanic White -0.5940566 0.3903109 -1.522009 0.1283
## Plate.Number -0.0234922 0.0187327 -1.254078 0.2101
##
## Correlation:
## (Intr) time sqrt.c ED_Age ED_HGAD ED_SoL
## time -0.226
## sqrt.conc -0.301 0.001
## ED_Age -0.409 0.004 0.251
## ED_HighestGradeAdvanced Degree -0.219 0.002 -0.015 -0.098
## ED_HighestGradeHigh School or Less -0.245 0.000 0.025 0.105 0.237
## ED_HighestGradeBachelor's Degree -0.156 0.000 0.008 -0.069 0.244 0.339
## BMI -0.465 0.006 -0.101 -0.167 0.069 0.051
## ED_Pain -0.444 0.002 -0.060 0.046 0.136 -0.074
## RaceNon-Hispanic Black -0.310 0.000 -0.050 -0.064 0.221 0.042
## RaceNon-Hispanic Other -0.258 0.002 -0.048 0.027 0.118 -0.002
## RaceNon-Hispanic White -0.316 -0.002 -0.055 -0.089 0.131 0.059
## Plate.Number -0.285 0.000 0.004 -0.022 0.063 0.066
## ED_HGBD BMI ED_Pan RcN-HB RcN-HO RcN-HW
## time
## sqrt.conc
## ED_Age
## ED_HighestGradeAdvanced Degree
## ED_HighestGradeHigh School or Less
## ED_HighestGradeBachelor's Degree
## BMI 0.032
## ED_Pain 0.070 0.013
## RaceNon-Hispanic Black 0.079 -0.011 0.015
## RaceNon-Hispanic Other 0.048 0.022 0.096 0.475
## RaceNon-Hispanic White -0.045 -0.008 0.159 0.718 0.456
## Plate.Number 0.014 0.062 0.076 -0.038 -0.046 -0.108
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.096733348 -0.819667252 0.005922671 0.718887652 2.479985354
##
## Residual standard error: 3.005848
## Degrees of freedom: 1163 total; 1150 residual
ok well thats not exactly stellar. let’s see what we’re working with
#visualize pain over time
ggplot(e2_full_long, aes(x = as.factor(time), y = pain, group = PID, color = PID)) +
geom_line() +
geom_point() +
labs(x = "Time Point", y = "Pain Score") +
ggtitle("Pain Scores Over Time by PID") +
theme_minimal()
# try something else bc thats awful
summary_df <- e2_full_long %>%
group_by(time) %>%
summarise(mean_pain = mean(pain), median_pain = median(pain))
# Plot mean or median pain scores at each time point
ggplot(summary_df, aes(x = time)) +
geom_line(aes(y = mean_pain), color = "blue", size = 1.5) + # Mean pain scores
geom_point(aes(y = mean_pain), color = "blue", size = 3) +
geom_line(aes(y = median_pain), color = "red", size = 1.5, linetype = "dashed") + # Median pain scores
geom_point(aes(y = median_pain), color = "red", size = 3) +
geom_text(aes(x = 3.5, y = 4.2, label = "Mean"), color = "blue", size = 4, hjust = -0.2) + # Text for mean
geom_text(aes(x = 5, y = 4.1, label = "Median"), color = "red", size = 4, hjust = -0.2) + # Text for median
labs(x = "Time Point", y = "Pain Score", title = "Summary of Pain Scores Over Time") +
scale_x_continuous(breaks = c(2, 3, 6))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_text(aes(x = 3.5, y = 4.2, label = "Mean"), color = "blue", : All aesthetics have length 1, but the data has 3 rows.
## ℹ Did you mean to use `annotate()`?
## Warning in geom_text(aes(x = 5, y = 4.1, label = "Median"), color = "red", : All aesthetics have length 1, but the data has 3 rows.
## ℹ Did you mean to use `annotate()`?
visualize estrogen versus pain over time
ggplot(e2_full_long, aes(x = sqrt.conc, y = pain, color = factor(time), shape = factor(time))) +
geom_point(size = 3) +
labs(x = "Estrogen Concentration (sqrt)", y = "Pain Level", color = "Time", shape = "Time") +
scale_color_discrete(labels = c("2" = "Time 2", "3" = "Time 3", "6" = "Time 6")) +
scale_shape_manual(values = c(16, 17, 18), labels = c("2" = "Time 2", "3" = "Time 3", "6" = "Time 6"))
ggplot(e2_full_long, aes(x = sqrt.conc, fill = factor(time))) +
geom_density(alpha = 0.5) +
labs(x = "Estrogen Concentration (sqrt)", y = "Density", fill = "Time") +
facet_wrap(~ time, ncol = 1) +
theme_minimal()
summary stats
e2_full_long %>%
group_by(time, pain) %>%
summarise(
Median_E2 = median(sqrt.conc, na.rm = TRUE),
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
## # A tibble: 33 × 3
## # Groups: time [3]
## time pain Median_E2
## <dbl> <int> <dbl>
## 1 2 0 5.21
## 2 2 1 4.89
## 3 2 2 4.26
## 4 2 3 4.82
## 5 2 4 4.58
## 6 2 5 4.67
## 7 2 6 5.19
## 8 2 7 5.58
## 9 2 8 4.95
## 10 2 9 5.05
## # ℹ 23 more rows
ggplot(e2_full_long, aes(x = sqrt.conc, y = pain)) +
geom_point(size=4, alpha=.1,color="firebrick4", fill="violetred",shape=21)
cor(e2_full_long$sqrt,e2_full_long$pain)
## [1] -0.00841645
idea.. refactor pain to two categories: low to none and moderate to severe
e2_full_long <- e2_full_long %>%
mutate(bin_pain = cut(pain, breaks = c(-Inf, 4, 10), labels = c("Low to None", "Moderate to Severe"))) #dimensions 1163 x 13
#review summary stats
e2_full_long %>%
group_by(time, bin_pain) %>%
summarise(
Mean_E2 = mean(sqrt.conc, na.rm = TRUE),
Median_E2 = median(sqrt.conc, na.rm = TRUE),
)
## `summarise()` has grouped output by 'time'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups: time [3]
## time bin_pain Mean_E2 Median_E2
## <dbl> <fct> <dbl> <dbl>
## 1 2 Low to None 5.58 4.80
## 2 2 Moderate to Severe 6.05 5.08
## 3 3 Low to None 5.84 4.86
## 4 3 Moderate to Severe 5.86 4.98
## 5 6 Low to None 5.87 4.86
## 6 6 Moderate to Severe 5.79 4.98
ggplot(e2_full_long, aes(x = sqrt.conc, fill = bin_pain)) +
geom_histogram(binwidth = .2) +
facet_grid(time ~ .) +
labs(x = "Square Root of Estrogen Concentration", y = "Frequency", fill = "Pain Level")
ggplot(e2_full_long, aes(x = time, y = sqrt.conc, color = bin_pain)) +
geom_point() +
labs(x = "Time", y = "Square Root of Estrogen Concentration", color = "Pain Level")
lets look at the e2 values and re-level
#visualize
summary(e2_full_long$sqrt.conc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.325 3.867 4.906 5.841 6.844 20.137
ggplot(data = e2_full_long, aes(x = sqrt.conc, y = ..density..)) +
geom_density(color = "deeppink3", fill = "deeppink3", alpha = .4, size=1.8) +
labs(x = "Estrogen Level (sqrt.conc)", y = "Density",
title = "Density Plot of Estrogen Levels") +
geom_histogram(data = e2_full_long, aes(x = sqrt.conc, y = ..density..),
fill = "skyblue", color = "gray20", alpha = 0.9, bins = 50)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#visualize ED estrogen levels versus pain at month 6
df_m6 <- e2_full_long %>%
filter(time == 6) #dimensions 397 x 13
ggplot(df_m6, aes(x = bin_pain, y = sqrt.conc)) +
geom_boxplot() +
labs(title = "Box Plot of sqrt.conc by bin_pain at month 6")
ggplot(df_m6, aes(x = sqrt.conc, y = pain)) +
geom_point() +
labs(title = "Scatter Plot of sqrt.conc versus pain at month 6",
x = "Sqrt.Conc",
y = "Pain")
summary table
df_m6 %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain) %>%
tbl_summary(by = bin_pain,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_p()
| Characteristic | Low to None, N = 2311 | Moderate to Severe, N = 1661 | p-value2 |
|---|---|---|---|
| ED_Age | 36 (14) | 39 (13) | 0.012 |
| ED_HighestGrade | 0.2 | ||
| Some College or Associate Degree | 91 (39%) | 68 (41%) | |
| Advanced Degree | 19 (8.2%) | 8 (4.8%) | |
| High School or Less | 76 (33%) | 66 (40%) | |
| Bachelor's Degree | 45 (19%) | 24 (14%) | |
| BMI | 29 (8) | 31 (9) | 0.008 |
| Race | 0.3 | ||
| Hispanic | 29 (13%) | 28 (17%) | |
| Non-Hispanic Black | 98 (42%) | 77 (46%) | |
| Non-Hispanic Other | 16 (6.9%) | 7 (4.2%) | |
| Non-Hispanic White | 88 (38%) | 54 (33%) | |
| sqrt.conc | 5.87 (3.07) | 5.79 (3.13) | 0.7 |
| ED_Pain | 6 (3) | 7 (2) | <0.001 |
| pain | 1 (2) | 7 (2) | <0.001 |
| 1 Mean (SD); n (%) | |||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | |||
#explore cut off points, pick some levels
summary(e2_full_long$sqrt.conc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.325 3.867 4.906 5.841 6.844 20.137
length(e2_full_long[e2_full_long$sqrt.conc >= 10, "sqrt.conc"]) #109 values greater than or equal to 10
## [1] 109
length(e2_full_long[e2_full_long$sqrt.conc <= 3, "sqrt.conc"]) #101 values less than or equal to 3
## [1] 101
length(e2_full_long[e2_full_long$sqrt.conc >= 6.844, "sqrt.conc"]) #291 values greater than or equal to 3rd quartile
## [1] 291
length(e2_full_long[e2_full_long$sqrt.conc <= 3.867, "sqrt.conc"]) #289 values less than or equal to 1st quartile
## [1] 289
length(e2_full_long$sqrt.conc[e2_full_long$sqrt.conc >= 3.867 & e2_full_long$sqrt.conc <= 6.844]) #583 values between 1st and 3rd quartile
## [1] 583
#583+289+291 = 1153
make new categories based on this exploration..
e2_full_long <- e2_full_long %>%
mutate(e2_bin = case_when(
sqrt.conc <= 3 ~ "low E2",
sqrt.conc > 3 & sqrt.conc < 10 ~ "mid E2",
sqrt.conc >= 10 ~ "high E2"
))
#new dimensions 1163 x 14
summary table with new categories
e2_full_long %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
add_overall() %>%
add_p()%>%
bold_p()
| Characteristic | Overall, N = 1,1631 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 1091 | low E2, N = 1011 | mid E2, N = 9531 | |||
| ED_Age | 37 (13) | 31 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.024 | ||||
| Some College or Associate Degree | 466 (40%) | 45 (41%) | 28 (28%) | 393 (41%) | |
| Advanced Degree | 81 (7.0%) | 6 (5.5%) | 3 (3.0%) | 72 (7.6%) | |
| High School or Less | 416 (36%) | 42 (39%) | 44 (44%) | 330 (35%) | |
| Bachelor's Degree | 200 (17%) | 16 (15%) | 26 (26%) | 158 (17%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.2 |
| Race | <0.001 | ||||
| Hispanic | 167 (14%) | 9 (8.3%) | 17 (17%) | 141 (15%) | |
| Non-Hispanic Black | 514 (44%) | 47 (43%) | 35 (35%) | 432 (45%) | |
| Non-Hispanic Other | 69 (5.9%) | 6 (5.5%) | 15 (15%) | 48 (5.0%) | |
| Non-Hispanic White | 413 (36%) | 47 (43%) | 34 (34%) | 332 (35%) | |
| sqrt.conc | 5.84 (3.09) | 13.44 (2.61) | 2.46 (0.39) | 5.33 (1.67) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.5 |
| bin_pain | 0.2 | ||||
| Low to None | 601 (52%) | 54 (50%) | 44 (44%) | 503 (53%) | |
| Moderate to Severe | 562 (48%) | 55 (50%) | 57 (56%) | 450 (47%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 4 (3) | 0.005 |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
#above, this shows the category pain is significant (p=.005). this is dependent on time point, which is not in this summary table. low E2 has a mean pain of 5 while mid and high E2 have a mean pain of 4, across all time points.
#when all time points are combined, pain is significant across the different E2 categories
#but below, when i stratify by time point, it is no longer significant
#summary table for MONTH 2
e2_full_long %>%
filter(time == 2) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 3841 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 361 | low E2, N = 341 | mid E2, N = 3141 | |||
| ED_Age | 37 (13) | 31 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.5 | ||||
| Some College or Associate Degree | 155 (40%) | 15 (42%) | 9 (26%) | 131 (42%) | |
| Advanced Degree | 27 (7.0%) | 2 (5.6%) | 1 (2.9%) | 24 (7.6%) | |
| High School or Less | 135 (35%) | 14 (39%) | 15 (44%) | 106 (34%) | |
| Bachelor's Degree | 67 (17%) | 5 (14%) | 9 (26%) | 53 (17%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.6 |
| Race | 0.3 | ||||
| Hispanic | 55 (14%) | 3 (8.3%) | 6 (18%) | 46 (15%) | |
| Non-Hispanic Black | 170 (44%) | 16 (44%) | 11 (32%) | 143 (46%) | |
| Non-Hispanic Other | 23 (6.0%) | 2 (5.6%) | 5 (15%) | 16 (5.1%) | |
| Non-Hispanic White | 136 (35%) | 15 (42%) | 12 (35%) | 109 (35%) | |
| sqrt.conc | 5.84 (3.11) | 13.47 (2.66) | 2.45 (0.40) | 5.33 (1.67) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.7 |
| bin_pain | 0.3 | ||||
| Low to None | 174 (45%) | 13 (36%) | 13 (38%) | 148 (47%) | |
| Moderate to Severe | 210 (55%) | 23 (64%) | 21 (62%) | 166 (53%) | |
| pain | 5 (3) | 5 (3) | 6 (3) | 5 (3) | 0.10 |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | |||||
#summary table for MONTH 3
e2_full_long %>%
filter(time == 3) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 3821 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 361 | low E2, N = 321 | mid E2, N = 3141 | |||
| ED_Age | 37 (13) | 32 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.7 | ||||
| Some College or Associate Degree | 152 (40%) | 15 (42%) | 9 (28%) | 128 (41%) | |
| Advanced Degree | 27 (7.1%) | 2 (5.6%) | 1 (3.1%) | 24 (7.6%) | |
| High School or Less | 139 (36%) | 14 (39%) | 14 (44%) | 111 (35%) | |
| Bachelor's Degree | 64 (17%) | 5 (14%) | 8 (25%) | 51 (16%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.6 |
| Race | 0.3 | ||||
| Hispanic | 55 (14%) | 3 (8.3%) | 5 (16%) | 47 (15%) | |
| Non-Hispanic Black | 169 (44%) | 15 (42%) | 12 (38%) | 142 (45%) | |
| Non-Hispanic Other | 23 (6.0%) | 2 (5.6%) | 5 (16%) | 16 (5.1%) | |
| Non-Hispanic White | 135 (35%) | 16 (44%) | 10 (31%) | 109 (35%) | |
| sqrt.conc | 5.85 (3.08) | 13.38 (2.61) | 2.47 (0.38) | 5.33 (1.68) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.9 |
| bin_pain | 0.7 | ||||
| Low to None | 196 (51%) | 19 (53%) | 14 (44%) | 163 (52%) | |
| Moderate to Severe | 186 (49%) | 17 (47%) | 18 (56%) | 151 (48%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 4 (3) | 0.2 |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | |||||
#summary table for MONTH 6
e2_full_long %>%
filter(time == 6) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 3971 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 371 | low E2, N = 351 | mid E2, N = 3251 | |||
| ED_Age | 37 (13) | 31 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.7 | ||||
| Some College or Associate Degree | 159 (40%) | 15 (41%) | 10 (29%) | 134 (41%) | |
| Advanced Degree | 27 (6.8%) | 2 (5.4%) | 1 (2.9%) | 24 (7.4%) | |
| High School or Less | 142 (36%) | 14 (38%) | 15 (43%) | 113 (35%) | |
| Bachelor's Degree | 69 (17%) | 6 (16%) | 9 (26%) | 54 (17%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.6 |
| Race | 0.3 | ||||
| Hispanic | 57 (14%) | 3 (8.1%) | 6 (17%) | 48 (15%) | |
| Non-Hispanic Black | 175 (44%) | 16 (43%) | 12 (34%) | 147 (45%) | |
| Non-Hispanic Other | 23 (5.8%) | 2 (5.4%) | 5 (14%) | 16 (4.9%) | |
| Non-Hispanic White | 142 (36%) | 16 (43%) | 12 (34%) | 114 (35%) | |
| sqrt.conc | 5.84 (3.09) | 13.46 (2.63) | 2.46 (0.40) | 5.33 (1.67) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.8 |
| bin_pain | 0.5 | ||||
| Low to None | 231 (58%) | 22 (59%) | 17 (49%) | 192 (59%) | |
| Moderate to Severe | 166 (42%) | 15 (41%) | 18 (51%) | 133 (41%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 4 (3) | 0.086 |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | |||||
what if I only look at low and high e2 levels?
e2_full_long %>%
filter(e2_bin %in% c("high E2", "low E2")) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_p() %>% bold_p() %>% add_overall()
| Characteristic | Overall, N = 2101 | high E2, N = 1091 | low E2, N = 1011 | p-value2 |
|---|---|---|---|---|
| ED_Age | 38 (13) | 31 (9) | 46 (12) | <0.001 |
| ED_HighestGrade | 0.070 | |||
| Some College or Associate Degree | 73 (35%) | 45 (41%) | 28 (28%) | |
| Advanced Degree | 9 (4.3%) | 6 (5.5%) | 3 (3.0%) | |
| High School or Less | 86 (41%) | 42 (39%) | 44 (44%) | |
| Bachelor's Degree | 42 (20%) | 16 (15%) | 26 (26%) | |
| BMI | 29 (8) | 30 (9) | 28 (6) | 0.3 |
| Race | 0.020 | |||
| Hispanic | 26 (12%) | 9 (8.3%) | 17 (17%) | |
| Non-Hispanic Black | 82 (39%) | 47 (43%) | 35 (35%) | |
| Non-Hispanic Other | 21 (10%) | 6 (5.5%) | 15 (15%) | |
| Non-Hispanic White | 81 (39%) | 47 (43%) | 34 (34%) | |
| sqrt.conc | 8.2 (5.8) | 13.4 (2.6) | 2.5 (0.4) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 0.2 |
| bin_pain | 0.4 | |||
| Low to None | 98 (47%) | 54 (50%) | 44 (44%) | |
| Moderate to Severe | 112 (53%) | 55 (50%) | 57 (56%) | |
| pain | 5 (3) | 4 (3) | 5 (3) | 0.014 |
| 1 Mean (SD); n (%) | ||||
| 2 Wilcoxon rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | ||||
#this shows pain across all time points is sig (p=.014)
repeat this summary table for each time point
e2_full_long %>%
filter(time == 2) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_p() %>% bold_p() %>% add_overall() #2 month, p =.3 for pain
## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
| Characteristic | Overall, N = 701 | high E2, N = 361 | low E2, N = 341 | p-value2 |
|---|---|---|---|---|
| ED_Age | 38 (13) | 31 (9) | 46 (12) | <0.001 |
| ED_HighestGrade | 0.4 | |||
| Some College or Associate Degree | 24 (34%) | 15 (42%) | 9 (26%) | |
| Advanced Degree | 3 (4.3%) | 2 (5.6%) | 1 (2.9%) | |
| High School or Less | 29 (41%) | 14 (39%) | 15 (44%) | |
| Bachelor's Degree | 14 (20%) | 5 (14%) | 9 (26%) | |
| BMI | 29 (8) | 30 (9) | 28 (6) | 0.5 |
| Race | 0.3 | |||
| Hispanic | 9 (13%) | 3 (8.3%) | 6 (18%) | |
| Non-Hispanic Black | 27 (39%) | 16 (44%) | 11 (32%) | |
| Non-Hispanic Other | 7 (10%) | 2 (5.6%) | 5 (15%) | |
| Non-Hispanic White | 27 (39%) | 15 (42%) | 12 (35%) | |
| sqrt.conc | 8.1 (5.9) | 13.5 (2.7) | 2.4 (0.4) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 0.4 |
| bin_pain | 0.9 | |||
| Low to None | 26 (37%) | 13 (36%) | 13 (38%) | |
| Moderate to Severe | 44 (63%) | 23 (64%) | 21 (62%) | |
| pain | 5 (3) | 5 (3) | 6 (3) | 0.3 |
| 1 Mean (SD); n (%) | ||||
| 2 Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test | ||||
e2_full_long %>%
filter(time == 3) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_p() %>% bold_p() %>% add_overall() #3 month, p =.12 for pain
## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
| Characteristic | Overall, N = 681 | high E2, N = 361 | low E2, N = 321 | p-value2 |
|---|---|---|---|---|
| ED_Age | 38 (12) | 32 (9) | 46 (12) | <0.001 |
| ED_HighestGrade | 0.5 | |||
| Some College or Associate Degree | 24 (35%) | 15 (42%) | 9 (28%) | |
| Advanced Degree | 3 (4.4%) | 2 (5.6%) | 1 (3.1%) | |
| High School or Less | 28 (41%) | 14 (39%) | 14 (44%) | |
| Bachelor's Degree | 13 (19%) | 5 (14%) | 8 (25%) | |
| BMI | 29 (8) | 30 (9) | 28 (6) | 0.5 |
| Race | 0.4 | |||
| Hispanic | 8 (12%) | 3 (8.3%) | 5 (16%) | |
| Non-Hispanic Black | 27 (40%) | 15 (42%) | 12 (38%) | |
| Non-Hispanic Other | 7 (10%) | 2 (5.6%) | 5 (16%) | |
| Non-Hispanic White | 26 (38%) | 16 (44%) | 10 (31%) | |
| sqrt.conc | 8.2 (5.8) | 13.4 (2.6) | 2.5 (0.4) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 0.6 |
| bin_pain | 0.5 | |||
| Low to None | 33 (49%) | 19 (53%) | 14 (44%) | |
| Moderate to Severe | 35 (51%) | 17 (47%) | 18 (56%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 0.12 |
| 1 Mean (SD); n (%) | ||||
| 2 Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test | ||||
e2_full_long %>%
filter(time == 6) %>% filter(e2_bin %in% c("high E2", "low E2")) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin) %>%
tbl_summary(by = e2_bin, statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_p() %>% bold_p() %>% add_overall() #6 month, p =.081 for pain
## Warning for variable 'ED_Age':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'BMI':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'ED_Pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
## Warning for variable 'pain':
## simpleWarning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot compute exact p-value with ties
| Characteristic | Overall, N = 721 | high E2, N = 371 | low E2, N = 351 | p-value2 |
|---|---|---|---|---|
| ED_Age | 38 (13) | 31 (9) | 46 (12) | <0.001 |
| ED_HighestGrade | 0.6 | |||
| Some College or Associate Degree | 25 (35%) | 15 (41%) | 10 (29%) | |
| Advanced Degree | 3 (4.2%) | 2 (5.4%) | 1 (2.9%) | |
| High School or Less | 29 (40%) | 14 (38%) | 15 (43%) | |
| Bachelor's Degree | 15 (21%) | 6 (16%) | 9 (26%) | |
| BMI | 29 (8) | 30 (9) | 28 (6) | 0.6 |
| Race | 0.4 | |||
| Hispanic | 9 (13%) | 3 (8.1%) | 6 (17%) | |
| Non-Hispanic Black | 28 (39%) | 16 (43%) | 12 (34%) | |
| Non-Hispanic Other | 7 (9.7%) | 2 (5.4%) | 5 (14%) | |
| Non-Hispanic White | 28 (39%) | 16 (43%) | 12 (34%) | |
| sqrt.conc | 8.1 (5.9) | 13.5 (2.6) | 2.5 (0.4) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 0.5 |
| bin_pain | 0.4 | |||
| Low to None | 39 (54%) | 22 (59%) | 17 (49%) | |
| Moderate to Severe | 33 (46%) | 15 (41%) | 18 (51%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 0.081 |
| 1 Mean (SD); n (%) | ||||
| 2 Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test; Pearson’s Chi-squared test | ||||
damn !! i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case
visuals
ggplot(e2_full_long, aes(x = pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
#from this we see that mid and high estrogen had more 0s. low E2 had more 10s
#let's repeat this by time point to see we aren't getting auto-correlation
#BELOW: PAIN AT 2 MONTHS
ggplot(data = e2_full_long %>% filter(time == 2), aes(x = pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
#BELOW: PAIN AT 3 MONTHS
ggplot(data = e2_full_long %>% filter(time == 3), aes(x = pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
#BELOW: PAIN AT 6 MONTHS
ggplot(data = e2_full_long %>% filter(time == 6), aes(x = pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
#boxplot of e2_bins and pain at month 2
ggplot(e2_full_long %>% filter(time == 2), aes(x = pain, y = e2_bin)) +
geom_boxplot() +
labs(title = "E2_bin X Pain at month 2")
#boxplot of e2_bins and pain at month 3
ggplot(e2_full_long %>% filter(time == 3), aes(x = pain, y = e2_bin)) +
geom_boxplot() +
labs(title = "E2_bin X Pain at month 3")
#boxplot of e2_bins and pain at month 6
ggplot(e2_full_long %>% filter(time == 6), aes(x = pain, y = e2_bin)) +
geom_boxplot() +
labs(title = "E2_bin X Pain at month 6")
Ok this is very useful!! We see that low E2 has fewer 0s and more 10s at each time point. This makes me think a different pain stratification should take place that focuses on the extremes of the pain NRS scale.
#high E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "high E2"), aes(x = pain, fill = as.factor(time))) +
geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
labs(title = "High E2 x Pain",
x = "Pain", y = "Frequency")
#mid E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "mid E2"), aes(x = pain, fill = as.factor(time))) +
geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
labs(title = "Mid E2 x Pain",
x = "Pain", y = "Frequency")
#low E2 across each time point
ggplot(e2_full_long %>% filter(e2_bin == "low E2"), aes(x = pain, fill = as.factor(time))) +
geom_histogram(binwidth = 1, position = "dodge", alpha = 0.7, color="black") +
labs(title = "Low E2 x Pain",
x = "Pain", y = "Frequency")
#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 2, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
geom_histogram(binwidth = .6, position = "dodge", color="black") +
scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
labs(title = "Pain at Month 2 by High and Low E2",
x = "Pain Score",
y = "Frequency")
#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 3, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
geom_histogram(binwidth = .6, position = "dodge", color="black") +
scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
labs(title = "Pain at Month 3 by High and Low E2",
x = "Pain Score",
y = "Frequency")
#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(time == 6, e2_bin %in% c("high E2", "low E2")), aes(x = pain, fill = e2_bin)) +
geom_histogram(binwidth = .6, position = "dodge", color="black") +
scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
labs(title = "Pain at Month 6 by High and Low E2",
x = "Pain Score",
y = "Frequency")
chi square ?
table(e2_full_long$e2_bin, as.factor(e2_full_long$pain))
##
## 0 1 2 3 4 5 6 7 8 9 10
## high E2 27 6 9 6 6 15 9 12 10 1 8
## low E2 7 11 5 10 11 13 9 5 6 3 21
## mid E2 188 75 55 93 92 111 111 75 71 35 47
table(e2_full_long$e2_bin, e2_full_long$bin_pain)
##
## Low to None Moderate to Severe
## high E2 54 55
## low E2 44 57
## mid E2 503 450
chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain))
## Warning in chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$pain)):
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: e2_full_long$e2_bin and as.factor(e2_full_long$pain)
## X-squared = 59.641, df = 20, p-value = 8.093e-06
chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896
##
## Pearson's Chi-squared test
##
## data: e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 3.3259, df = 2, p-value = 0.1896
kruskal.test(pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459
##
## Kruskal-Wallis rank sum test
##
## data: pain by e2_bin
## Kruskal-Wallis chi-squared = 10.768, df = 2, p-value = 0.00459
make new levels for pain
e2_full_long <- e2_full_long %>%
mutate(bin_pain2 = case_when(
pain < 2 ~ "0 or 1 pain",
pain > 1 & pain < 9 ~ "2 to 8 pain",
pain > 8 ~ "9 or 10 pain"
))
#new dimensions 1163 x 15
e2_full_long <- e2_full_long %>%
mutate(bin_pain3 = case_when(
pain < 1 ~ "0 pain",
pain > 0 & pain < 10 ~ "1 to 9 pain",
pain > 9 ~ "10 pain"
))
#new dimensions 1163 x 16
summary table
#each time point
e2_full_long %>%
#filter(time == 2) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 1,1631 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 1091 | low E2, N = 1011 | mid E2, N = 9531 | |||
| ED_Age | 37 (13) | 31 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.024 | ||||
| Some College or Associate Degree | 466 (40%) | 45 (41%) | 28 (28%) | 393 (41%) | |
| Advanced Degree | 81 (7.0%) | 6 (5.5%) | 3 (3.0%) | 72 (7.6%) | |
| High School or Less | 416 (36%) | 42 (39%) | 44 (44%) | 330 (35%) | |
| Bachelor's Degree | 200 (17%) | 16 (15%) | 26 (26%) | 158 (17%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.2 |
| Race | <0.001 | ||||
| Hispanic | 167 (14%) | 9 (8.3%) | 17 (17%) | 141 (15%) | |
| Non-Hispanic Black | 514 (44%) | 47 (43%) | 35 (35%) | 432 (45%) | |
| Non-Hispanic Other | 69 (5.9%) | 6 (5.5%) | 15 (15%) | 48 (5.0%) | |
| Non-Hispanic White | 413 (36%) | 47 (43%) | 34 (34%) | 332 (35%) | |
| sqrt.conc | 5.84 (3.09) | 13.44 (2.61) | 2.46 (0.39) | 5.33 (1.67) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.5 |
| bin_pain | 0.2 | ||||
| Low to None | 601 (52%) | 54 (50%) | 44 (44%) | 503 (53%) | |
| Moderate to Severe | 562 (48%) | 55 (50%) | 57 (56%) | 450 (47%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 4 (3) | 0.005 |
| bin_pain2 | <0.001 | ||||
| 0 or 1 pain | 314 (27%) | 33 (30%) | 18 (18%) | 263 (28%) | |
| 2 to 8 pain | 734 (63%) | 67 (61%) | 59 (58%) | 608 (64%) | |
| 9 or 10 pain | 115 (9.9%) | 9 (8.3%) | 24 (24%) | 82 (8.6%) | |
| bin_pain3 | <0.001 | ||||
| 0 pain | 222 (19%) | 27 (25%) | 7 (6.9%) | 188 (20%) | |
| 1 to 9 pain | 865 (74%) | 74 (68%) | 73 (72%) | 718 (75%) | |
| 10 pain | 76 (6.5%) | 8 (7.3%) | 21 (21%) | 47 (4.9%) | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
#bin pain 2 and bin pain 3 significant (p<.001)
#month 2
e2_full_long %>%
filter(time == 2) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 3841 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 361 | low E2, N = 341 | mid E2, N = 3141 | |||
| ED_Age | 37 (13) | 31 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.5 | ||||
| Some College or Associate Degree | 155 (40%) | 15 (42%) | 9 (26%) | 131 (42%) | |
| Advanced Degree | 27 (7.0%) | 2 (5.6%) | 1 (2.9%) | 24 (7.6%) | |
| High School or Less | 135 (35%) | 14 (39%) | 15 (44%) | 106 (34%) | |
| Bachelor's Degree | 67 (17%) | 5 (14%) | 9 (26%) | 53 (17%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.6 |
| Race | 0.3 | ||||
| Hispanic | 55 (14%) | 3 (8.3%) | 6 (18%) | 46 (15%) | |
| Non-Hispanic Black | 170 (44%) | 16 (44%) | 11 (32%) | 143 (46%) | |
| Non-Hispanic Other | 23 (6.0%) | 2 (5.6%) | 5 (15%) | 16 (5.1%) | |
| Non-Hispanic White | 136 (35%) | 15 (42%) | 12 (35%) | 109 (35%) | |
| sqrt.conc | 5.84 (3.11) | 13.47 (2.66) | 2.45 (0.40) | 5.33 (1.67) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.7 |
| bin_pain | 0.3 | ||||
| Low to None | 174 (45%) | 13 (36%) | 13 (38%) | 148 (47%) | |
| Moderate to Severe | 210 (55%) | 23 (64%) | 21 (62%) | 166 (53%) | |
| pain | 5 (3) | 5 (3) | 6 (3) | 5 (3) | 0.10 |
| bin_pain2 | 0.3 | ||||
| 0 or 1 pain | 72 (19%) | 7 (19%) | 4 (12%) | 61 (19%) | |
| 2 to 8 pain | 267 (70%) | 26 (72%) | 22 (65%) | 219 (70%) | |
| 9 or 10 pain | 45 (12%) | 3 (8.3%) | 8 (24%) | 34 (11%) | |
| bin_pain3 | 0.047 | ||||
| 0 pain | 48 (13%) | 6 (17%) | 2 (5.9%) | 40 (13%) | |
| 1 to 9 pain | 307 (80%) | 27 (75%) | 25 (74%) | 255 (81%) | |
| 10 pain | 29 (7.6%) | 3 (8.3%) | 7 (21%) | 19 (6.1%) | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | |||||
#bin pain 3 significant (p=0.047) but not bin_pain2
#month 3
e2_full_long %>%
filter(time == 3) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 3821 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 361 | low E2, N = 321 | mid E2, N = 3141 | |||
| ED_Age | 37 (13) | 32 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.7 | ||||
| Some College or Associate Degree | 152 (40%) | 15 (42%) | 9 (28%) | 128 (41%) | |
| Advanced Degree | 27 (7.1%) | 2 (5.6%) | 1 (3.1%) | 24 (7.6%) | |
| High School or Less | 139 (36%) | 14 (39%) | 14 (44%) | 111 (35%) | |
| Bachelor's Degree | 64 (17%) | 5 (14%) | 8 (25%) | 51 (16%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.6 |
| Race | 0.3 | ||||
| Hispanic | 55 (14%) | 3 (8.3%) | 5 (16%) | 47 (15%) | |
| Non-Hispanic Black | 169 (44%) | 15 (42%) | 12 (38%) | 142 (45%) | |
| Non-Hispanic Other | 23 (6.0%) | 2 (5.6%) | 5 (16%) | 16 (5.1%) | |
| Non-Hispanic White | 135 (35%) | 16 (44%) | 10 (31%) | 109 (35%) | |
| sqrt.conc | 5.85 (3.08) | 13.38 (2.61) | 2.47 (0.38) | 5.33 (1.68) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.9 |
| bin_pain | 0.7 | ||||
| Low to None | 196 (51%) | 19 (53%) | 14 (44%) | 163 (52%) | |
| Moderate to Severe | 186 (49%) | 17 (47%) | 18 (56%) | 151 (48%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 4 (3) | 0.2 |
| bin_pain2 | 0.086 | ||||
| 0 or 1 pain | 105 (27%) | 13 (36%) | 5 (16%) | 87 (28%) | |
| 2 to 8 pain | 241 (63%) | 20 (56%) | 20 (63%) | 201 (64%) | |
| 9 or 10 pain | 36 (9.4%) | 3 (8.3%) | 7 (22%) | 26 (8.3%) | |
| bin_pain3 | 0.012 | ||||
| 0 pain | 72 (19%) | 10 (28%) | 3 (9.4%) | 59 (19%) | |
| 1 to 9 pain | 287 (75%) | 23 (64%) | 23 (72%) | 241 (77%) | |
| 10 pain | 23 (6.0%) | 3 (8.3%) | 6 (19%) | 14 (4.5%) | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | |||||
#bin_pain3 is sig (p=0.012) but not bin pain 2
#month 6
e2_full_long %>%
filter(time == 6) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, pain, e2_bin, bin_pain2, bin_pain3) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Overall, N = 3971 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 371 | low E2, N = 351 | mid E2, N = 3251 | |||
| ED_Age | 37 (13) | 31 (9) | 46 (12) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.7 | ||||
| Some College or Associate Degree | 159 (40%) | 15 (41%) | 10 (29%) | 134 (41%) | |
| Advanced Degree | 27 (6.8%) | 2 (5.4%) | 1 (2.9%) | 24 (7.4%) | |
| High School or Less | 142 (36%) | 14 (38%) | 15 (43%) | 113 (35%) | |
| Bachelor's Degree | 69 (17%) | 6 (16%) | 9 (26%) | 54 (17%) | |
| BMI | 30 (8) | 30 (9) | 28 (6) | 30 (8) | 0.6 |
| Race | 0.3 | ||||
| Hispanic | 57 (14%) | 3 (8.1%) | 6 (17%) | 48 (15%) | |
| Non-Hispanic Black | 175 (44%) | 16 (43%) | 12 (34%) | 147 (45%) | |
| Non-Hispanic Other | 23 (5.8%) | 2 (5.4%) | 5 (14%) | 16 (4.9%) | |
| Non-Hispanic White | 142 (36%) | 16 (43%) | 12 (34%) | 114 (35%) | |
| sqrt.conc | 5.84 (3.09) | 13.46 (2.63) | 2.46 (0.40) | 5.33 (1.67) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.8 |
| bin_pain | 0.5 | ||||
| Low to None | 231 (58%) | 22 (59%) | 17 (49%) | 192 (59%) | |
| Moderate to Severe | 166 (42%) | 15 (41%) | 18 (51%) | 133 (41%) | |
| pain | 4 (3) | 4 (3) | 5 (3) | 4 (3) | 0.086 |
| bin_pain2 | 0.021 | ||||
| 0 or 1 pain | 137 (35%) | 13 (35%) | 9 (26%) | 115 (35%) | |
| 2 to 8 pain | 226 (57%) | 21 (57%) | 17 (49%) | 188 (58%) | |
| 9 or 10 pain | 34 (8.6%) | 3 (8.1%) | 9 (26%) | 22 (6.8%) | |
| bin_pain3 | <0.001 | ||||
| 0 pain | 102 (26%) | 11 (30%) | 2 (5.7%) | 89 (27%) | |
| 1 to 9 pain | 271 (68%) | 24 (65%) | 25 (71%) | 222 (68%) | |
| 10 pain | 24 (6.0%) | 2 (5.4%) | 8 (23%) | 14 (4.3%) | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Fisher’s exact test; Pearson’s Chi-squared test | |||||
#bin pain 2 sig (p=0.021) and bin pain 3 sig (p<.001)
hell yeah
stats for new categories
table(e2_full_long$e2_bin, e2_full_long$bin_pain3)
##
## 0 pain 1 to 9 pain 10 pain
## high E2 27 74 8
## low E2 7 73 21
## mid E2 188 718 47
round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain3), margin = 1) * 100,2)
##
## 0 pain 1 to 9 pain 10 pain
## high E2 24.77 67.89 7.34
## low E2 6.93 72.28 20.79
## mid E2 19.73 75.34 4.93
chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"], e2_full_long[e2_full_long$time == 2, "bin_pain3"]) # p-value = 0.03053
## Warning in chisq.test(e2_full_long[e2_full_long$time == 2, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: e2_full_long[e2_full_long$time == 2, "e2_bin"] and e2_full_long[e2_full_long$time == 2, "bin_pain3"]
## X-squared = 10.67, df = 4, p-value = 0.03053
chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"], e2_full_long[e2_full_long$time == 3, "bin_pain3"]) #p-value = 0.007257
## Warning in chisq.test(e2_full_long[e2_full_long$time == 3, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: e2_full_long[e2_full_long$time == 3, "e2_bin"] and e2_full_long[e2_full_long$time == 3, "bin_pain3"]
## X-squared = 14.012, df = 4, p-value = 0.007257
chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"], e2_full_long[e2_full_long$time == 6, "bin_pain3"]) #p-value = 7.419e-05
## Warning in chisq.test(e2_full_long[e2_full_long$time == 6, "e2_bin"],
## e2_full_long[e2_full_long$time == : Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: e2_full_long[e2_full_long$time == 6, "e2_bin"] and e2_full_long[e2_full_long$time == 6, "bin_pain3"]
## X-squared = 24.16, df = 4, p-value = 7.419e-05
redo model from above
MARCH 2024 NEW E2 DATA CODE GLS MODEL
e2_model <- gls(pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long)
summary(e2_model)
## Generalized least squares fit by REML
## Model: pain ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin
## Data: e2_full_long
## AIC BIC logLik
## 5563.071 5648.849 -2764.535
##
## Correlation Structure: ARMA(1,0)
## Formula: ~time | PID
## Parameter estimate(s):
## Phi1
## 0.7287282
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.1045118 1.2952495 0.852741 0.3940
## time -0.2372138 0.0453306 -5.232973 0.0000
## sqrt.conc 0.0769038 0.0720024 1.068072 0.2857
## ED_Age 0.0397639 0.0097896 4.061841 0.0001
## ED_HighestGradeAdvanced Degree -1.0276386 0.5174215 -1.986076 0.0473
## ED_HighestGradeHigh School or Less -0.0479274 0.2807562 -0.170708 0.8645
## ED_HighestGradeBachelor's Degree -0.7130133 0.3522310 -2.024277 0.0432
## BMI 0.0287845 0.0147894 1.946294 0.0519
## ED_Pain 0.2582287 0.0489249 5.278060 0.0000
## RaceNon-Hispanic Black -0.5138723 0.3735496 -1.375647 0.1692
## RaceNon-Hispanic Other -0.5722177 0.5989953 -0.955296 0.3396
## RaceNon-Hispanic White -0.5373559 0.3901958 -1.377144 0.1687
## Plate.Number -0.0233885 0.0186592 -1.253453 0.2103
## e2_binlow E2 1.6358354 0.9715319 1.683769 0.0925
## e2_binmid E2 0.4536050 0.7128541 0.636322 0.5247
##
## Correlation:
## (Intr) time sqrt.c ED_Age ED_HGAD ED_SoL
## time -0.143
## sqrt.conc -0.743 -0.001
## ED_Age -0.280 0.004 0.146
## ED_HighestGradeAdvanced Degree -0.153 0.002 0.013 -0.106
## ED_HighestGradeHigh School or Less -0.149 0.000 -0.001 0.117 0.231
## ED_HighestGradeBachelor's Degree -0.083 0.000 -0.019 -0.058 0.239 0.343
## BMI -0.237 0.006 -0.107 -0.180 0.072 0.044
## ED_Pain -0.272 0.002 -0.045 0.046 0.135 -0.073
## RaceNon-Hispanic Black -0.222 0.000 0.004 -0.069 0.224 0.038
## RaceNon-Hispanic Other -0.107 0.002 -0.096 0.040 0.110 0.007
## RaceNon-Hispanic White -0.266 -0.002 0.044 -0.090 0.134 0.055
## Plate.Number -0.206 0.000 0.029 -0.019 0.062 0.067
## e2_binlow E2 -0.709 -0.001 0.798 -0.039 0.040 -0.039
## e2_binmid E2 -0.771 -0.002 0.811 0.046 0.014 0.000
## ED_HGBD BMI ED_Pan RcN-HB RcN-HO RcN-HW
## time
## sqrt.conc
## ED_Age
## ED_HighestGradeAdvanced Degree
## ED_HighestGradeHigh School or Less
## ED_HighestGradeBachelor's Degree
## BMI 0.028
## ED_Pain 0.070 0.013
## RaceNon-Hispanic Black 0.075 -0.010 0.014
## RaceNon-Hispanic Other 0.056 0.019 0.097 0.464
## RaceNon-Hispanic White -0.048 -0.012 0.157 0.718 0.441
## Plate.Number 0.014 0.059 0.075 -0.037 -0.047 -0.105
## e2_binlow E2 -0.044 -0.033 -0.014 0.048 -0.105 0.092
## e2_binmid E2 -0.014 -0.079 -0.012 0.029 -0.063 0.083
## Plt.Nm e2_bnlE2
## time
## sqrt.conc
## ED_Age
## ED_HighestGradeAdvanced Degree
## ED_HighestGradeHigh School or Less
## ED_HighestGradeBachelor's Degree
## BMI
## ED_Pain
## RaceNon-Hispanic Black
## RaceNon-Hispanic Other
## RaceNon-Hispanic White
## Plate.Number
## e2_binlow E2 0.026
## e2_binmid E2 0.034 0.880
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.20825378 -0.80873595 0.03119517 0.73834813 2.56387776
##
## Residual standard error: 2.996335
## Degrees of freedom: 1163 total; 1148 residual
#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain3,
`0 pain` = "1",
`1 to 9 pain` = "2",
`10 pain` = "3") #new dimensions 1163 x 17
#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2 p = 2.398452e-03 = 0.002398452
## # weights: 48 (30 variable)
## initial value 1277.686092
## iter 10 value 976.255582
## iter 20 value 828.205849
## iter 30 value 760.275081
## iter 40 value 757.915874
## final value 757.915779
## converged
#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE)
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 1.0845445 | 0.8347454 | 1.2992519 | 0.1938575 | -0.5515263 | 2.7206154 |
| 2 | time | -0.1986780 | 0.0441989 | -4.4950857 | 0.0000070 | -0.2853063 | -0.1120497 |
| 2 | sqrt.conc | -0.0246233 | 0.0448684 | -0.5487893 | 0.5831500 | -0.1125638 | 0.0633172 |
| 2 | ED_Age | 0.0241006 | 0.0066603 | 3.6185254 | 0.0002963 | 0.0110466 | 0.0371546 |
| 2 | ED_HighestGradeAdvanced Degree | 0.0625538 | 0.3408485 | 0.1835238 | 0.8543871 | -0.6054970 | 0.7306046 |
| 2 | ED_HighestGradeHigh School or Less | -0.1319082 | 0.1793021 | -0.7356753 | 0.4619283 | -0.4833339 | 0.2195176 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.1582817 | 0.2390016 | 0.6622620 | 0.5078034 | -0.3101529 | 0.6267163 |
| 2 | BMI | 0.0116255 | 0.0097880 | 1.1877339 | 0.2349383 | -0.0075586 | 0.0308096 |
| 2 | ED_Pain | 0.0897672 | 0.0315763 | 2.8428666 | 0.0044710 | 0.0278788 | 0.1516556 |
| 2 | RaceNon-Hispanic Black | -0.5316580 | 0.2670261 | -1.9910341 | 0.0464771 | -1.0550195 | -0.0082965 |
| 2 | RaceNon-Hispanic Other | -0.3982038 | 0.3928737 | -1.0135670 | 0.3107894 | -1.1682220 | 0.3718145 |
| 2 | RaceNon-Hispanic White | -0.3554165 | 0.2777867 | -1.2794584 | 0.2007357 | -0.8998683 | 0.1890354 |
| 2 | Plate.Number | -0.0205870 | 0.0118948 | -1.7307527 | 0.0834959 | -0.0439005 | 0.0027264 |
| 2 | e2_binlow E2 | 0.8054025 | 0.6832337 | 1.1788096 | 0.2384740 | -0.5337110 | 2.1445161 |
| 2 | e2_binmid E2 | 0.0402581 | 0.4401452 | 0.0914656 | 0.9271226 | -0.8224105 | 0.9029268 |
| 3 | (Intercept) | -4.3091417 | 1.5604971 | -2.7613905 | 0.0057556 | -7.3676598 | -1.2506237 |
| 3 | time | -0.2171498 | 0.0828435 | -2.6212044 | 0.0087620 | -0.3795201 | -0.0547795 |
| 3 | sqrt.conc | 0.1403640 | 0.0810177 | 1.7325095 | 0.0831829 | -0.0184278 | 0.2991558 |
| 3 | ED_Age | 0.0518864 | 0.0123929 | 4.1868010 | 0.0000283 | 0.0275969 | 0.0761759 |
| 3 | ED_HighestGradeAdvanced Degree | -1.5629041 | 1.0823019 | -1.4440556 | 0.1487233 | -3.6841767 | 0.5583686 |
| 3 | ED_HighestGradeHigh School or Less | -0.1705342 | 0.3118840 | -0.5467874 | 0.5845248 | -0.7818157 | 0.4407472 |
| 3 | ED_HighestGradeBachelor’s Degree | -0.6793027 | 0.5122167 | -1.3262017 | 0.1847729 | -1.6832291 | 0.3246236 |
| 3 | BMI | -0.0214102 | 0.0196563 | -1.0892260 | 0.2760542 | -0.0599359 | 0.0171155 |
| 3 | ED_Pain | 0.3089434 | 0.0655578 | 4.7125342 | 0.0000024 | 0.1804525 | 0.4374343 |
| 3 | RaceNon-Hispanic Black | -0.5619284 | 0.4226214 | -1.3296261 | 0.1836415 | -1.3902511 | 0.2663943 |
| 3 | RaceNon-Hispanic Other | -0.8826778 | 0.7154517 | -1.2337350 | 0.2173017 | -2.2849373 | 0.5195818 |
| 3 | RaceNon-Hispanic White | -0.6647507 | 0.4814270 | -1.3807924 | 0.1673428 | -1.6083302 | 0.2788288 |
| 3 | Plate.Number | -0.0440065 | 0.0225823 | -1.9487162 | 0.0513293 | -0.0882670 | 0.0002540 |
| 3 | e2_binlow E2 | 3.3565935 | 1.1056458 | 3.0358670 | 0.0023985 | 1.1895676 | 5.5236193 |
| 3 | e2_binmid E2 | 0.7428651 | 0.7929725 | 0.9368107 | 0.3488560 | -0.8113324 | 2.2970625 |
#e2_bin low E2 on category 3 (10 pain) p = 2.398452e-03 = 0.002398452
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain
# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin,
subset = time == 2, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights: 45 (28 variable)
## initial value 421.867119
## iter 10 value 246.880449
## iter 20 value 217.350612
## iter 30 value 215.581016
## iter 40 value 215.559264
## iter 50 value 215.558537
## iter 50 value 215.558536
## iter 50 value 215.558536
## final value 215.558536
## converged
tidy(model_m2, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 0.0858882 | 1.7295095 | 4.966050e-02 | 0.9603930 | -3.3038880 | 3.4756645 |
| 2 | sqrt.conc | -0.0147206 | 0.0959880 | -1.533589e-01 | 0.8781152 | -0.2028536 | 0.1734124 |
| 2 | ED_Age | 0.0334912 | 0.0143580 | 2.332580e+00 | 0.0196702 | 0.0053500 | 0.0616325 |
| 2 | ED_HighestGradeAdvanced Degree | 0.1225345 | 0.7225473 | 1.695868e-01 | 0.8653351 | -1.2936321 | 1.5387012 |
| 2 | ED_HighestGradeHigh School or Less | -0.4313307 | 0.3744552 | -1.151889e+00 | 0.2493667 | -1.1652494 | 0.3025879 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.1035662 | 0.5042208 | 2.053985e-01 | 0.8372608 | -0.8846884 | 1.0918208 |
| 2 | BMI | -0.0005951 | 0.0197932 | -3.006750e-02 | 0.9760132 | -0.0393891 | 0.0381989 |
| 2 | ED_Pain | 0.2279952 | 0.0642853 | 3.546617e+00 | 0.0003902 | 0.1019984 | 0.3539920 |
| 2 | RaceNon-Hispanic Black | -0.9484818 | 0.6599985 | -1.437097e+00 | 0.1506904 | -2.2420551 | 0.3450915 |
| 2 | RaceNon-Hispanic Other | -0.8525189 | 0.8584694 | -9.930685e-01 | 0.3206766 | -2.5350880 | 0.8300502 |
| 2 | RaceNon-Hispanic White | -0.9190518 | 0.6705089 | -1.370678e+00 | 0.1704753 | -2.2332251 | 0.3951215 |
| 2 | Plate.Number | 0.0172467 | 0.0253205 | 6.811342e-01 | 0.4957866 | -0.0323806 | 0.0668739 |
| 2 | e2_binlow E2 | 0.5784955 | 1.3879456 | 4.167998e-01 | 0.6768248 | -2.1418279 | 3.2988188 |
| 2 | e2_binmid E2 | 0.1494685 | 0.9184424 | 1.627413e-01 | 0.8707221 | -1.6506455 | 1.9495825 |
| 3 | (Intercept) | -6.3707373 | 2.7188448 | -2.343178e+00 | 0.0191203 | -11.6995752 | -1.0418994 |
| 3 | sqrt.conc | 0.2628004 | 0.1411389 | 1.861998e+00 | 0.0626034 | -0.0138268 | 0.5394275 |
| 3 | ED_Age | 0.0463402 | 0.0222611 | 2.081669e+00 | 0.0373727 | 0.0027093 | 0.0899712 |
| 3 | ED_HighestGradeAdvanced Degree | -14.2193859 | 0.0000018 | -8.033872e+06 | 0.0000000 | -14.2193894 | -14.2193825 |
| 3 | ED_HighestGradeHigh School or Less | -0.7225669 | 0.5709831 | -1.265478e+00 | 0.2056998 | -1.8416733 | 0.3965395 |
| 3 | ED_HighestGradeBachelor’s Degree | -0.6155575 | 0.8259645 | -7.452590e-01 | 0.4561151 | -2.2344182 | 1.0033032 |
| 3 | BMI | -0.0466263 | 0.0353995 | -1.317147e+00 | 0.1877895 | -0.1160081 | 0.0227554 |
| 3 | ED_Pain | 0.4455983 | 0.1157997 | 3.848008e+00 | 0.0001191 | 0.2186350 | 0.6725617 |
| 3 | RaceNon-Hispanic Black | -1.0157518 | 0.8513202 | -1.193149e+00 | 0.2328111 | -2.6843087 | 0.6528052 |
| 3 | RaceNon-Hispanic Other | -1.9980674 | 1.4362052 | -1.391213e+00 | 0.1641608 | -4.8129778 | 0.8168429 |
| 3 | RaceNon-Hispanic White | -0.9375516 | 0.9131053 | -1.026773e+00 | 0.3045275 | -2.7272050 | 0.8521018 |
| 3 | Plate.Number | 0.0154110 | 0.0394421 | 3.907258e-01 | 0.6959999 | -0.0618940 | 0.0927161 |
| 3 | e2_binlow E2 | 4.8051741 | 2.0343863 | 2.361977e+00 | 0.0181778 | 0.8178502 | 8.7924980 |
| 3 | e2_binmid E2 | 2.1336870 | 1.4507924 | 1.470705e+00 | 0.1413710 | -0.7098137 | 4.9771878 |
#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin,
subset = time == 3, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights: 45 (28 variable)
## initial value 419.669894
## iter 10 value 265.651792
## iter 20 value 245.264571
## iter 30 value 243.580449
## final value 243.578224
## converged
tidy(model_m3, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE) #no significance
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 0.1438903 | 1.4251253 | 0.1009668 | 0.9195768 | -2.6493040 | 2.9370847 |
| 2 | sqrt.conc | -0.0282643 | 0.0773150 | -0.3655737 | 0.7146832 | -0.1797990 | 0.1232703 |
| 2 | ED_Age | 0.0265784 | 0.0116658 | 2.2783275 | 0.0227071 | 0.0037140 | 0.0494429 |
| 2 | ED_HighestGradeAdvanced Degree | 0.1810048 | 0.6172130 | 0.2932614 | 0.7693223 | -1.0287105 | 1.3907201 |
| 2 | ED_HighestGradeHigh School or Less | -0.0572961 | 0.3072826 | -0.1864606 | 0.8520835 | -0.6595590 | 0.5449667 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.3226113 | 0.4328355 | 0.7453439 | 0.4560638 | -0.5257307 | 1.1709534 |
| 2 | BMI | 0.0202893 | 0.0174147 | 1.1650659 | 0.2439923 | -0.0138429 | 0.0544214 |
| 2 | ED_Pain | 0.0500940 | 0.0552732 | 0.9062990 | 0.3647776 | -0.0582394 | 0.1584275 |
| 2 | RaceNon-Hispanic Black | -0.7732148 | 0.4909491 | -1.5749389 | 0.1152706 | -1.7354572 | 0.1890277 |
| 2 | RaceNon-Hispanic Other | -0.2607000 | 0.7272638 | -0.3584668 | 0.7199940 | -1.6861108 | 1.1647109 |
| 2 | RaceNon-Hispanic White | -0.6709575 | 0.5101474 | -1.3152227 | 0.1884351 | -1.6708281 | 0.3289131 |
| 2 | Plate.Number | -0.0070094 | 0.0207073 | -0.3384981 | 0.7349879 | -0.0475949 | 0.0335762 |
| 2 | e2_binlow E2 | 0.4799754 | 1.1288069 | 0.4252059 | 0.6706866 | -1.7324456 | 2.6923963 |
| 2 | e2_binmid E2 | 0.2094152 | 0.7478034 | 0.2800404 | 0.7794465 | -1.2562526 | 1.6750830 |
| 3 | (Intercept) | -5.8817990 | 2.7799044 | -2.1158278 | 0.0343595 | -11.3303114 | -0.4332865 |
| 3 | sqrt.conc | 0.1531560 | 0.1468975 | 1.0426041 | 0.2971317 | -0.1347579 | 0.4410698 |
| 3 | ED_Age | 0.0506380 | 0.0229009 | 2.2111761 | 0.0270236 | 0.0057530 | 0.0955230 |
| 3 | ED_HighestGradeAdvanced Degree | 0.0678288 | 1.2654705 | 0.0535997 | 0.9572541 | -2.4124479 | 2.5481055 |
| 3 | ED_HighestGradeHigh School or Less | 0.2665381 | 0.5757837 | 0.4629136 | 0.6434263 | -0.8619772 | 1.3950535 |
| 3 | ED_HighestGradeBachelor’s Degree | -0.0078086 | 0.9238302 | -0.0084524 | 0.9932560 | -1.8184825 | 1.8028652 |
| 3 | BMI | -0.0015620 | 0.0341004 | -0.0458069 | 0.9634641 | -0.0683975 | 0.0652734 |
| 3 | ED_Pain | 0.3746751 | 0.1269878 | 2.9504817 | 0.0031728 | 0.1257836 | 0.6235665 |
| 3 | RaceNon-Hispanic Black | -1.1860836 | 0.7500276 | -1.5813866 | 0.1137897 | -2.6561106 | 0.2839435 |
| 3 | RaceNon-Hispanic Other | -1.1249035 | 1.3691195 | -0.8216255 | 0.4112901 | -3.8083284 | 1.5585215 |
| 3 | RaceNon-Hispanic White | -0.7911548 | 0.8243052 | -0.9597838 | 0.3371640 | -2.4067633 | 0.8244537 |
| 3 | Plate.Number | -0.0840347 | 0.0453981 | -1.8510609 | 0.0641608 | -0.1730133 | 0.0049440 |
| 3 | e2_binlow E2 | 2.9876362 | 1.8874151 | 1.5829248 | 0.1134386 | -0.7116295 | 6.6869018 |
| 3 | e2_binmid E2 | 0.5806531 | 1.3310391 | 0.4362404 | 0.6626623 | -2.0281356 | 3.1894418 |
#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin,
subset = time == 6, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights: 45 (28 variable)
## initial value 436.149079
## iter 10 value 306.109642
## iter 20 value 280.123037
## iter 30 value 278.298475
## iter 40 value 278.254523
## iter 50 value 278.253270
## iter 50 value 278.253268
## iter 50 value 278.253268
## final value 278.253268
## converged
tidy(model_m6, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 0.5545904 | 1.2574649 | 4.410385e-01 | 0.6591851 | -1.9099955 | 3.0191763 |
| 2 | sqrt.conc | -0.0206597 | 0.0689491 | -2.996363e-01 | 0.7644546 | -0.1557975 | 0.1144782 |
| 2 | ED_Age | 0.0182652 | 0.0100995 | 1.808518e+00 | 0.0705258 | -0.0015295 | 0.0380599 |
| 2 | ED_HighestGradeAdvanced Degree | -0.0670360 | 0.5113894 | -1.310860e-01 | 0.8957073 | -1.0693408 | 0.9352688 |
| 2 | ED_HighestGradeHigh School or Less | -0.0116962 | 0.2802121 | -4.174060e-02 | 0.9667055 | -0.5609020 | 0.5375095 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.0680815 | 0.3563096 | 1.910741e-01 | 0.8484675 | -0.6302724 | 0.7664355 |
| 2 | BMI | 0.0119860 | 0.0150889 | 7.943606e-01 | 0.4269855 | -0.0175877 | 0.0415597 |
| 2 | ED_Pain | 0.0405024 | 0.0494682 | 8.187562e-01 | 0.4129255 | -0.0564535 | 0.1374584 |
| 2 | RaceNon-Hispanic Black | -0.2305422 | 0.3803868 | -6.060730e-01 | 0.5444663 | -0.9760867 | 0.5150023 |
| 2 | RaceNon-Hispanic Other | -0.3601504 | 0.5932052 | -6.071261e-01 | 0.5437672 | -1.5228113 | 0.8025105 |
| 2 | RaceNon-Hispanic White | 0.0853640 | 0.3992413 | 2.138155e-01 | 0.8306909 | -0.6971346 | 0.8678625 |
| 2 | Plate.Number | -0.0524656 | 0.0184486 | -2.843882e+00 | 0.0044568 | -0.0886242 | -0.0163070 |
| 2 | e2_binlow E2 | 1.3410510 | 1.1340476 | 1.182535e+00 | 0.2369935 | -0.8816414 | 3.5637434 |
| 2 | e2_binmid E2 | -0.1077641 | 0.6895640 | -1.562786e-01 | 0.8758134 | -1.4592846 | 1.2437564 |
| 3 | (Intercept) | -3.0896264 | 2.8204937 | -1.095420e+00 | 0.2733325 | -8.6176924 | 2.4384396 |
| 3 | sqrt.conc | -0.0463444 | 0.1645973 | -2.815624e-01 | 0.7782791 | -0.3689492 | 0.2762604 |
| 3 | ED_Age | 0.0722735 | 0.0220570 | 3.276674e+00 | 0.0010504 | 0.0290426 | 0.1155043 |
| 3 | ED_HighestGradeAdvanced Degree | -13.9746453 | 0.0000027 | -5.102638e+06 | 0.0000000 | -13.9746507 | -13.9746399 |
| 3 | ED_HighestGradeHigh School or Less | -0.0954912 | 0.5289536 | -1.805284e-01 | 0.8567377 | -1.1322212 | 0.9412389 |
| 3 | ED_HighestGradeBachelor’s Degree | -1.5368456 | 1.1258387 | -1.365067e+00 | 0.1722319 | -3.7434489 | 0.6697578 |
| 3 | BMI | -0.0224125 | 0.0352416 | -6.359679e-01 | 0.5247974 | -0.0914848 | 0.0466598 |
| 3 | ED_Pain | 0.1922234 | 0.1068746 | 1.798588e+00 | 0.0720838 | -0.0172470 | 0.4016938 |
| 3 | RaceNon-Hispanic Black | 0.1720303 | 0.7607170 | 2.261424e-01 | 0.8210907 | -1.3189476 | 1.6630083 |
| 3 | RaceNon-Hispanic Other | 0.1933064 | 1.1271797 | 1.714956e-01 | 0.8638341 | -2.0159253 | 2.4025380 |
| 3 | RaceNon-Hispanic White | -0.9022036 | 0.9595953 | -9.401918e-01 | 0.3471192 | -2.7829758 | 0.9785685 |
| 3 | Plate.Number | -0.0614748 | 0.0387215 | -1.587613e+00 | 0.1123738 | -0.1373677 | 0.0144180 |
| 3 | e2_binlow E2 | 1.6842627 | 2.0821929 | 8.088889e-01 | 0.4185791 | -2.3967604 | 5.7652858 |
| 3 | e2_binmid E2 | -0.9692742 | 1.5326311 | -6.324250e-01 | 0.5271092 | -3.9731760 | 2.0346275 |
#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)
miRNA original code for miRNA
E2_women_wide_all<- E2_women_wide %>%
inner_join(ID_miRNA,by="PID")
E2_women_long_all<- E2_women_long %>%
inner_join(ID_miRNA,by="PID")
## Warning in inner_join(., ID_miRNA, by = "PID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 176 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
NEW MARCH 2024 CODE FOR miRNA
none of the PIDs match up between the new estrogen data and the miRNA data
#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)
e2_wide_all<- e2_full_wide %>%
inner_join(ID_miRNA,by="PID")
E2_long_all<- e2_full_long %>%
inner_join(ID_miRNA,by="PID")