library(data.table);library(ggfortify)
library(factoextra);library(ClassDiscovery)
library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)
library(tidyverse);library(naniar);library(gtsummary)
require(lme4); require(nnet);
library(broom);library(readxl);library(nlme);library(sjPlot)
library(flexmix);library(poLCA)

read in and subset data

e2_full_long <- as.data.frame(read.csv("cleaned_E2_and_Aurora_Data.csv"))

#subset to estrogen collected at timepoint 0 == e2 collected at ED visit. convert pain to numeric
e2_full_long <- e2_full_long %>% 
  filter(AlternateID == "T0") %>%
  mutate(NRS_pain = as.numeric(NRS_pain))
#1296 x 27

length(unique(e2_full_long[["PID"]])) #432 PID
## [1] 432

March 2024 Batch 2 GLS model

#need complete cases to run this model. 
e2_full_long_gls_complete <- e2_full_long[complete.cases(e2_full_long),] #708 
e2_full_long_gls <- e2_full_long[complete.cases(e2_full_long$NRS_pain), ] #1254


#e2_model <- gls(NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)

#e2_model <- gls(NRS_pain ~ 1, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)

#tab_model(e2_model)

Use flexmix package for mixture modeling

df <- e2_full_long_gls #1254 
df6 <- e2_full_long_gls %>% filter(timepoint == 6) #432 obs
df6_complete <- e2_full_long_gls_complete %>% filter(timepoint == 6) #236 obs

#attempt #1
m <- flexmix(NRS_pain ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, k = 2, data=df6_complete)
summary(m)
## 
## Call:
## flexmix(formula = NRS_pain ~ sqrt.conc + ED_Age + ED_HighestGrade + 
##     BMI + ED_Pain + Race + Plate.Number, data = df6_complete, 
##     k = 2)
## 
##        prior size post>0 ratio
## Comp.1 0.199   42    126 0.333
## Comp.2 0.801  194    236 0.822
## 
## 'log Lik.' -521.7948 (df=27)
## AIC: 1097.59   BIC: 1191.113
parameters(m, component = 1)
##                                                           Comp.1
## coef.(Intercept)                                      6.52428059
## coef.sqrt.conc                                       -0.11282087
## coef.ED_Age                                          -0.03751555
## coef.ED_HighestGradeBachelor's Degree                -2.03571560
## coef.ED_HighestGradeHigh School or Less              -1.35848215
## coef.ED_HighestGradeSome College or Associate Degree -1.26509036
## coef.BMI                                              0.16228141
## coef.ED_Pain                                         -0.64069568
## coef.RaceNon-Hispanic Black                          -0.89860325
## coef.RaceNon-Hispanic Other                          -5.94522871
## coef.RaceNon-Hispanic White                          -2.43011311
## coef.Plate.Number                                     0.20282749
## sigma                                                 0.60344288

attempt #2

ggplot(df6, aes(x = NRS_pain)) + 
  geom_histogram(aes(NRS_pain, ..density..), binwidth = 1, colour = "black", fill = "white")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#k parameters 2 instead of 3
mo1 <- FLXMRglm(family = "gaussian")
mo2 <- FLXMRglm(family = "gaussian")
#mo3 <- FLXMRglm(family = "gaussian")
m2 <- flexmix(NRS_pain ~ 1, data = df6, k = 2, model = list(mo1, mo2))
#flexfit <- flexmix(NRS_pain ~ 1, data = df6, k = 3, model = list(mo1, mo2, mo3))


c1 <- parameters(m2, component=1)[[1]]
c2 <- parameters(m2, component=2)[[1]]
#c3 <- parameters(m2, component=3)[[1]]

plot_mix_comps <- function(x, mu, sigma, lam) {
  lam * dnorm(x, mu, sigma)
}

lam <- table(clusters(m2))
  
ggplot(df6) +
geom_histogram(aes(NRS_pain, ..density..), binwidth = 1, colour = "black", fill = "white") +
stat_function(geom = "line", fun = plot_mix_comps,
                args = list(c1[1], c1[2], lam[1]/sum(lam)),
                colour = "red", lwd = 1.5) +
stat_function(geom = "line", fun = plot_mix_comps,
                args = list(c2[1], c2[2], lam[2]/sum(lam)),
                colour = "blue", lwd = 1.5) +
#stat_function(geom = "line", fun = plot_mix_comps,args = list(c3[1], c3[2], lam[3]/sum(lam)),colour = "green", lwd = 1.5) +
ylab("Density")

#maybe

mixture model attempt #3

seed=69

df6_simple <- df6[,c("PID","NRS_pain","sqrt.conc")]

ggplot(df, aes(x = NRS_pain)) + 
  geom_density(color = "black", fill = "white") +
  labs(x = "Pain", y = "Density", title = "Density Plot of Pain all times")

ggplot(df6_simple, aes(x = NRS_pain)) + 
  geom_density(color = "black", fill = "white") +
  labs(x = "Pain", y = "Density", title = "Density Plot of Pain MONTH 6")

#m3 = flexmix(NRS_pain ~ sqrt.conc, data=df6_simple, k=2)
#summary(m3)
#parameters(m3)

latent class analysis

#recode e2_bin so it can be used in the model
df6_lca <- df6_complete %>%
  mutate(e2_bin = recode(e2_bin, "low E2" = 1, "mid E2" = 2, "high E2" = 3))

#everything numeric start at 1, so increment NRS pain
df6_lca$NRS_pain <- df6_lca$NRS_pain + 1

#increment postmenopause
df6_lca$postmenopause <- df6_lca$postmenopause + 1

#increment axial
df6_lca$Axial_Pain <- df6_lca$Axial_Pain + 1

#increment widespread
df6_lca$Widespread_pain_2016 <- df6_lca$Widespread_pain_2016 + 1

#round conc
df6_lca$sqrt.conc <- round(df6_lca$sqrt.conc,digits=0)

#recode bmi
breaks <- c(-Inf, 18.5, 24.9, 29.9, Inf)
labels <- c("Underweight", "Healthy Weight", "Overweight", "Obesity")
df6_lca <- df6_lca %>%
  mutate(BMI_cat = cut(BMI, breaks = breaks, labels = labels, include.lowest = TRUE))

#bin pains
df6_lca$bin_pain2 <- as.factor(df6_lca$bin_pain2)

levels(df6_lca$bin_pain2)
## [1] "1 to 9 pain" "10 pain"
f1 <- cbind(NRS_pain, sqrt.conc, Widespread_pain_2016,Axial_Pain, postmenopause,e2_bin)~1
f2 <- cbind(NRS_pain, sqrt.conc, Widespread_pain_2016,Axial_Pain, postmenopause,e2_bin, e2_confidence)~1
f3 <- cbind(Widespread_pain_2016,Axial_Pain, postmenopause, e2_confidence, BMI_cat,bin_pain2)~1


LCA2 <- poLCA(f1, data=df6_lca, nclass=2) #AIC(2): 3371.711 BIC(2): 3610.715
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $NRS_pain
##           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:      0 0.1937 0.1285 0.2729 0.1610 0.1184 0.0786 0.0226 0.0103 0.0000
## class 2:      0 0.0088 0.0090 0.0203 0.0675 0.1266 0.1834 0.2227 0.1551 0.0624
##           Pr(11)
## class 1:  0.0139
## class 2:  0.1443
## 
## $sqrt.conc
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:  0.0000 0.0587 0.1492 0.2392 0.2078 0.1514 0.0612 0.0215 0.0293 0.0169
## class 2:  0.0078 0.0598 0.1163 0.1967 0.2387 0.0288 0.1123 0.0677 0.0378 0.0403
##           Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1:  0.0091 0.0196 0.0174 0.0000 0.0093 0.0000      0      0      0 0.0093
## class 2:  0.0157 0.0147 0.0087 0.0312 0.0000 0.0234      0      0      0 0.0000
## 
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.6045 0.3955
## class 2:  0.2875 0.7125
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.1581 0.8419
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.7357 0.2643
## class 2:  0.7386 0.2614
## 
## $e2_bin
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.2560 0.5504 0.1936
## class 2:  0.2138 0.4343 0.3519
## 
## Estimated class population shares 
##  0.4564 0.5436 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4788 0.5212 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 69 
## residual degrees of freedom: 167 
## maximum log-likelihood: -1694.527 
##  
## AIC(2): 3527.055
## BIC(2): 3766.059
## G^2(2): 982.0811 (Likelihood ratio/deviance statistic) 
## X^2(2): 3672.267 (Chi-square goodness of fit) 
## 
LCA3 <- poLCA(f2, data=df6_lca, nclass=2) #AIC(2): 3957.45 BIC(2): 4217.238
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $NRS_pain
##           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:      0 0.0758 0.0606 0.1061 0.1212 0.0758 0.1515 0.1970 0.0909 0.0455
## class 2:      0 0.1000 0.0647 0.1471 0.1059 0.1412 0.1294 0.1059 0.0882 0.0294
##           Pr(11)
## class 1:  0.0758
## class 2:  0.0882
## 
## $sqrt.conc
##            Pr(1)  Pr(2)  Pr(3) Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:  0.0000 0.0000 0.0000   0.0 0.0000 0.0000 0.3182 0.1667 0.1212 0.1061
## class 2:  0.0059 0.0824 0.1824   0.3 0.3118 0.1176 0.0000 0.0000 0.0000 0.0000
##           Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1:  0.0455 0.0606 0.0455 0.0606 0.0152 0.0455      0      0      0 0.0152
## class 2:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000      0      0      0 0.0000
## 
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.4091 0.5909
## class 2:  0.4412 0.5588
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.4848 0.5152
## class 2:  0.5647 0.4353
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.9242 0.0758
## class 2:  0.6647 0.3353
## 
## $e2_bin
##            Pr(1)  Pr(2) Pr(3)
## class 1:  0.0000 0.0000     1
## class 2:  0.3235 0.6765     0
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.6667 0.1818 0.0758 0.0758
## class 2:  0.4059 0.2588 0.1412 0.1941
## 
## Estimated class population shares 
##  0.2797 0.7203 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.2797 0.7203 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 75 
## residual degrees of freedom: 161 
## maximum log-likelihood: -1903.725 
##  
## AIC(2): 3957.45
## BIC(2): 4217.238
## G^2(2): 1296.11 (Likelihood ratio/deviance statistic) 
## X^2(2): 8004.288 (Chi-square goodness of fit) 
## 
LCA4 <- poLCA(f2, data=df6_lca, nclass=3) #AIC(3): 3977.52 BIC(3): 4368.933
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $NRS_pain
##           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:      0 0.0957 0.0783 0.1391 0.1130 0.1304 0.1565 0.1217 0.0783 0.0261
## class 2:      0 0.1091 0.0364 0.1636 0.0909 0.1636 0.0727 0.0727 0.1091 0.0364
## class 3:      0 0.0758 0.0606 0.1061 0.1212 0.0758 0.1515 0.1970 0.0909 0.0455
##           Pr(11)
## class 1:  0.0609
## class 2:  0.1455
## class 3:  0.0758
## 
## $sqrt.conc
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:  0.0000 0.0000 0.0000 0.3652 0.4609 0.1739 0.0000 0.0000 0.0000 0.0000
## class 2:  0.0182 0.2545 0.5636 0.1636 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## class 3:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.3182 0.1667 0.1212 0.1061
##           Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000      0      0      0 0.0000
## class 2:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000      0      0      0 0.0000
## class 3:  0.0455 0.0606 0.0455 0.0606 0.0152 0.0455      0      0      0 0.0152
## 
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.4696 0.5304
## class 2:  0.3818 0.6182
## class 3:  0.4091 0.5909
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.5652 0.4348
## class 2:  0.5636 0.4364
## class 3:  0.4848 0.5152
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.7391 0.2609
## class 2:  0.5091 0.4909
## class 3:  0.9242 0.0758
## 
## $e2_bin
##           Pr(1) Pr(2) Pr(3)
## class 1:      0     1     0
## class 2:      1     0     0
## class 3:      0     0     1
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.4957 0.1913 0.1217 0.1913
## class 2:  0.2182 0.4000 0.1818 0.2000
## class 3:  0.6667 0.1818 0.0758 0.0758
## 
## Estimated class population shares 
##  0.4873 0.2331 0.2797 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4873 0.2331 0.2797 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 113 
## residual degrees of freedom: 123 
## maximum log-likelihood: -1804.119 
##  
## AIC(3): 3834.237
## BIC(3): 4225.65
## G^2(3): 1096.897 (Likelihood ratio/deviance statistic) 
## X^2(3): 4775.175 (Chi-square goodness of fit) 
## 
LCA5 <- poLCA(f3, data=df6_lca, nclass=2) #AIC(2): 1979.188 BIC(2): 2038.073 #this and LCA6 are best
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.1844 0.8156
## class 2:  0.5640 0.4360
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.0070 0.9930
## class 2:  0.8272 0.1728
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.6979 0.3021
## class 2:  0.7582 0.2418
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.4403 0.1990 0.1536 0.2071
## class 2:  0.4993 0.2577 0.1065 0.1365
## 
## $BMI_cat
##           Underweight Healthy Weight Overweight Obesity
## class 1:       0.0322         0.2209     0.2437  0.5033
## class 2:       0.0088         0.2330     0.3702  0.3879
## 
## $bin_pain2
##           1 to 9 pain 10 pain
## class 1:       0.8002  0.1998
## class 2:       0.9764  0.0236
## 
## Estimated class population shares 
##  0.3473 0.6527 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3729 0.6271 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 21 
## residual degrees of freedom: 215 
## maximum log-likelihood: -1069.426 
##  
## AIC(2): 2180.851
## BIC(2): 2253.592
## G^2(2): 176.3656 (Likelihood ratio/deviance statistic) 
## X^2(2): 303.8513 (Chi-square goodness of fit) 
##  
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND 
## 
LCA6 <- poLCA(f3, data=df6_lca, nclass=3) #AIC(3): 1967.918 BIC(3): 2057.978 #this and LCA5 are best
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.7860 0.2140
## class 2:  0.4941 0.5059
## class 3:  0.2173 0.7827
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.8657 0.1343
## class 2:  0.8835 0.1165
## class 3:  0.0000 1.0000
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.8897 0.1103
## class 2:  0.7119 0.2881
## class 3:  0.7081 0.2919
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.1135 0.3591 0.4261 0.1014
## class 2:  0.6198 0.2297 0.0000 0.1505
## class 3:  0.4516 0.1984 0.1526 0.1973
## 
## $BMI_cat
##           Underweight Healthy Weight Overweight Obesity
## class 1:       0.0300         0.0000     0.5419  0.4281
## class 2:       0.0000         0.3044     0.3207  0.3749
## class 3:       0.0324         0.2272     0.2480  0.4924
## 
## $bin_pain2
##           1 to 9 pain 10 pain
## class 1:       1.0000  0.0000
## class 2:       0.9688  0.0312
## class 3:       0.8167  0.1833
## 
## Estimated class population shares 
##  0.1512 0.4658 0.3831 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1229 0.4364 0.4407 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 32 
## residual degrees of freedom: 204 
## maximum log-likelihood: -1060.64 
##  
## AIC(3): 2185.28
## BIC(3): 2296.122
## G^2(3): 158.7938 (Likelihood ratio/deviance statistic) 
## X^2(3): 261.4029 (Chi-square goodness of fit) 
## 
# Calculate entropy (3-class mode)l- values closer to 1.0 indicate greater separation of the classes.
entropy<-function (p) sum(-p*log(p))
error_prior <- entropy(LCA6$P) # Class proportions
error_post <- mean(apply(LCA6$posterior, 1, entropy))
(error_prior - error_post) / error_prior
## [1] NaN
#predicted class membership is in:
LCA6$predclass[1:30]
##  [1] 3 3 3 1 3 3 2 2 1 3 2 1 3 3 3 1 3 2 1 1 3 2 3 2 2 1 3 3 3 2
#add variable to data set with all variables so it can be used as predictor variable:
df6_lca$class <- LCA6$predclass
ggplot(df6_lca, aes(x = NRS_pain, fill = as.factor(class))) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across m6", y = "Density", title = "Pain m6 by latent class")

ggplot(df6_lca, aes(x = sqrt.conc, fill = as.factor(class))) +
  geom_density(alpha = 0.5) +
  labs(x = "e2 Across m6", y = "Density", title = "e2 m6 by latent class")

ggplot(df6_lca, aes(x = BMI, fill = as.factor(class))) +
  geom_density(alpha = 0.5) +
  labs(x = "bmi Across m6", y = "Density", title = "bmi m6 by latent class")

#adding the E2_confidence feature changes the E2 concentration binning by class
plot(LCA5)

plot(LCA6)

plotting affinity classes

f2a <- cbind(Widespread_pain_2016,Axial_Pain, postmenopause, e2_confidence, BMI_cat,bin_pain2)~e2_bin

nes2a <- poLCA(f2a,df6_lca,nclass=3,nrep=5) 
## Model 1: llik = -1040.359 ... best llik = -1040.359
## Model 2: llik = -1067.829 ... best llik = -1040.359
## Model 3: llik = -1041.097 ... best llik = -1040.359
## Model 4: llik = -1041.097 ... best llik = -1040.359
## Model 5: llik = -1040.359 ... best llik = -1040.359
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.1806 0.8194
## class 2:  0.9964 0.0036
## class 3:  0.4113 0.5887
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.3937 0.6063
## class 2:  1.0000 0.0000
## class 3:  0.4817 0.5183
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.4293 0.5707
## class 2:  0.7538 0.2462
## class 3:  0.9742 0.0258
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.2686 0.2932 0.1085 0.3296
## class 2:  0.3384 0.3507 0.1987 0.1122
## class 3:  0.6996 0.1489 0.1047 0.0468
## 
## $BMI_cat
##           Underweight Healthy Weight Overweight Obesity
## class 1:       0.0142         0.2075     0.4054  0.3729
## class 2:       0.0222         0.0821     0.4658  0.4299
## class 3:       0.0171         0.3028     0.2094  0.4707
## 
## $bin_pain2
##           1 to 9 pain 10 pain
## class 1:       0.8429  0.1571
## class 2:       1.0000  0.0000
## class 3:       0.9394  0.0606
## 
## Estimated class population shares 
##  0.3626 0.1787 0.4588 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3263 0.1992 0.4746 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)    -1.51919     0.81303   -1.869     0.063
## e2_bin          0.48322     0.44740    1.080     0.281
## ========================================================= 
## 3 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)    -5.18142     1.44477   -3.586         0
## e2_bin          2.62987     0.66141    3.976         0
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 34 
## residual degrees of freedom: 202 
## maximum log-likelihood: -1040.359 
##  
## AIC(3): 2148.718
## BIC(3): 2266.488
## X^2(3): 256.2171 (Chi-square goodness of fit) 
##  
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND 
##  
## ALERT: estimation algorithm automatically restarted with new initial values 
## 
pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)

matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
main="e2 as a predictor of pain affinity class",
xlab="estrogen category ",
ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")

summary table

length(unique(e2_full_long[["PID"]])) #432 PID
## [1] 432
e2_full_long %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  #add_overall() %>%
  add_p()%>%
  bold_p()
Full Table: Features by Estrogen Bin, ALL TIME POINTS
Characteristic Relative Estrogen Bin p-value2
high E2, N = 3571 low E2, N = 2731 mid E2, N = 6661
ED_Age 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.001
    Advanced Degree 18 (5.0%) 18 (6.6%) 51 (7.7%)
    Bachelor's Degree 60 (17%) 69 (25%) 90 (14%)
    High School or Less 138 (39%) 93 (34%) 246 (37%)
    Some College or Associate Degree 141 (39%) 93 (34%) 279 (42%)
BMI 31 (10) 28 (6) 30 (8) 0.005
    Unknown 18 21 57
Widespread_pain_2016 152 (44%) 120 (46%) 232 (36%) 0.009
    Unknown 9 11 25
Axial_Pain 169 (62%) 127 (58%) 289 (56%) 0.3
    Unknown 86 54 152
ED_Pain 7 (2) 6 (3) 6 (3) 0.005
NRS_pain 4.5 (3.3) 4.4 (3.2) 4.0 (3.0) 0.028
    Unknown 9 9 24
bin_pain


0.2
    0 pain 77 (22%) 42 (16%) 123 (19%)
    Any pain 271 (78%) 222 (84%) 519 (81%)
    Unknown 9 9 24
bin_pain2


0.020
    0 pain 77 (22%) 42 (16%) 123 (19%)
    1 to 9 pain 247 (71%) 197 (75%) 490 (76%)
    10 pain 24 (6.9%) 25 (9.5%) 29 (4.5%)
    Unknown 9 9 24
postmenopause 27 (7.6%) 114 (42%) 153 (23%) <0.001
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
# NRS pain is significant by estrogen bin? 
#when all time points are combined, pain is significant across the different E2 categories
#but this has limited interpretability because pain above is across all time points

#summary table for MONTH 2 
#this is the only one sig for some reason
e2_full_long %>%
  filter(timepoint == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT
Characteristic Relative Estrogen Bin p-value2
high E2, N = 1191 low E2, N = 911 mid E2, N = 2221
ED_Age 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.3
    Advanced Degree 6 (5.0%) 6 (6.6%) 17 (7.7%)
    Bachelor's Degree 20 (17%) 23 (25%) 30 (14%)
    High School or Less 46 (39%) 31 (34%) 82 (37%)
    Some College or Associate Degree 47 (39%) 31 (34%) 93 (42%)
BMI 31 (10) 28 (6) 30 (8) 0.2
    Unknown 6 7 19
Widespread_pain_2016 57 (50%) 40 (47%) 77 (36%) 0.043
    Unknown 5 5 11
Axial_Pain 77 (79%) 68 (87%) 156 (85%) 0.3
    Unknown 21 13 38
ED_Pain 7 (2) 6 (3) 6 (3) 0.2
NRS_pain 5.1 (3.1) 4.7 (3.0) 4.4 (2.9) 0.095
    Unknown 5 4 11
bin_pain


0.7
    0 pain 16 (14%) 9 (10%) 27 (13%)
    Any pain 98 (86%) 78 (90%) 184 (87%)
    Unknown 5 4 11
bin_pain2


0.5
    0 pain 16 (14%) 9 (10%) 27 (13%)
    1 to 9 pain 87 (76%) 71 (82%) 173 (82%)
    10 pain 11 (9.6%) 7 (8.0%) 11 (5.2%)
    Unknown 5 4 11
postmenopause 9 (7.6%) 38 (42%) 51 (23%) <0.001
Plate.Number 11 (6) 12 (6) 10 (7) 0.2
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#summary table for MONTH 3
e2_full_long %>%
  filter(timepoint == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT
Characteristic Relative Estrogen Bin p-value2
high E2, N = 1191 low E2, N = 911 mid E2, N = 2221
ED_Age 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.3
    Advanced Degree 6 (5.0%) 6 (6.6%) 17 (7.7%)
    Bachelor's Degree 20 (17%) 23 (25%) 30 (14%)
    High School or Less 46 (39%) 31 (34%) 82 (37%)
    Some College or Associate Degree 47 (39%) 31 (34%) 93 (42%)
BMI 31 (10) 28 (6) 30 (8) 0.2
    Unknown 6 7 19
Widespread_pain_2016 47 (41%) 39 (45%) 77 (37%) 0.4
    Unknown 4 5 13
Axial_Pain 48 (53%) 31 (44%) 67 (40%) 0.12
    Unknown 29 20 55
ED_Pain 7 (2) 6 (3) 6 (3) 0.2
NRS_pain 4.4 (3.3) 4.5 (3.2) 3.9 (2.9) 0.3
    Unknown 4 5 13
bin_pain


0.6
    0 pain 25 (22%) 14 (16%) 38 (18%)
    Any pain 90 (78%) 72 (84%) 171 (82%)
    Unknown 4 5 13
bin_pain2


0.3
    0 pain 25 (22%) 14 (16%) 38 (18%)
    1 to 9 pain 82 (71%) 64 (74%) 163 (78%)
    10 pain 8 (7.0%) 8 (9.3%) 8 (3.8%)
    Unknown 4 5 13
postmenopause 9 (7.6%) 38 (42%) 51 (23%) <0.001
Plate.Number 11 (6) 12 (6) 10 (7) 0.2
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#summary table for MONTH 6
e2_full_long %>%
  filter(timepoint == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()
M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT
Characteristic Relative Estrogen Bin p-value2
high E2, N = 1191 low E2, N = 911 mid E2, N = 2221
ED_Age 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.3
    Advanced Degree 6 (5.0%) 6 (6.6%) 17 (7.7%)
    Bachelor's Degree 20 (17%) 23 (25%) 30 (14%)
    High School or Less 46 (39%) 31 (34%) 82 (37%)
    Some College or Associate Degree 47 (39%) 31 (34%) 93 (42%)
BMI 31 (10) 28 (6) 30 (8) 0.2
    Unknown 6 7 19
Widespread_pain_2016 48 (40%) 41 (46%) 78 (35%) 0.2
    Unknown 0 1 1
Axial_Pain 44 (53%) 28 (40%) 66 (40%) 0.14
    Unknown 36 21 59
ED_Pain 7 (2) 6 (3) 6 (3) 0.2
NRS_pain 3.9 (3.3) 4.1 (3.4) 3.5 (3.0) 0.4
bin_pain


0.3
    0 pain 36 (30%) 19 (21%) 58 (26%)
    Any pain 83 (70%) 72 (79%) 164 (74%)
bin_pain2


0.12
    0 pain 36 (30%) 19 (21%) 58 (26%)
    1 to 9 pain 78 (66%) 62 (68%) 154 (69%)
    10 pain 5 (4.2%) 10 (11%) 10 (4.5%)
postmenopause 9 (7.6%) 38 (42%) 51 (23%) <0.001
Plate.Number 11 (6) 12 (6) 10 (7) 0.2
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

what if I only look at low and high e2 levels? doesn’t work

e2_full_long %>%
  filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() #%>% add_overall()
Characteristic high E2, N = 3571 low E2, N = 2731 p-value2
ED_Age 33 (11) 44 (13) <0.001
ED_HighestGrade

0.042
    Advanced Degree 18 (5.0%) 18 (6.6%)
    Bachelor's Degree 60 (17%) 69 (25%)
    High School or Less 138 (39%) 93 (34%)
    Some College or Associate Degree 141 (39%) 93 (34%)
BMI 31 (10) 28 (6) 0.002
    Unknown 18 21
Widespread_pain_2016 152 (44%) 120 (46%) 0.6
    Unknown 9 11
Axial_Pain 169 (62%) 127 (58%) 0.3
    Unknown 86 54
ED_Pain 7 (2) 6 (3) 0.003
NRS_pain 4.5 (3.3) 4.4 (3.2) 0.9
    Unknown 9 9
bin_pain

0.054
    0 pain 77 (22%) 42 (16%)
    Any pain 271 (78%) 222 (84%)
    Unknown 9 9
bin_pain2

0.11
    0 pain 77 (22%) 42 (16%)
    1 to 9 pain 247 (71%) 197 (75%)
    10 pain 24 (6.9%) 25 (9.5%)
    Unknown 9 9
postmenopause 27 (7.6%) 114 (42%) <0.001
Plate.Number 11 (6) 12 (6) 0.088
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test
#nothing. also repeated for each timepoint and nothing sig

i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case

visuals

ggplot(e2_full_long, aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 
## Warning: Removed 42 rows containing non-finite outside the scale range
## (`stat_density()`).

#high and mid E2 peak at 0 pain. Everything mostly falls off at pain 10.
#BELOW: PAIN AT 2 MONTHS. Mostly mid level pain everyone
ggplot(data = e2_full_long %>% filter(timepoint == 2), aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_density()`).

#BELOW: PAIN AT 3 MONTHS, mostly mid level pain, high and mid e2 start to bounce back down to zero
ggplot(data = e2_full_long %>% filter(timepoint == 3), aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_density()`).

#BELOW: PAIN AT 6 MONTHS. Mid and high e2 have a strong peak at 0 pain, and high e2 has another strong bump at ~6 pain. Low e2 is mildly right skewed towards 10. 
ggplot(data = e2_full_long %>% filter(timepoint == 6), aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red")) 

Ok this is very useful!! We see that low E2 has fewer 0s and more 10s at each time point. This makes me think a different pain stratification should take place that focuses on the extremes of the pain NRS scale.

#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 2, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 2 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")
## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).

#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 3, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 3 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")
## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).

#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 6, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 6 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

chi square ?

table(e2_full_long$e2_bin, as.factor(e2_full_long$NRS_pain))
##          
##             0   1   2   3   4   5   6   7   8   9  10
##   high E2  77  17  18  20  32  34  45  30  38  13  24
##   low E2   42  23  17  28  27  27  25  20  25   5  25
##   mid E2  123  56  49  67  57  86  68  51  31  25  29
table(e2_full_long$e2_bin, e2_full_long$bin_pain)
##          
##           0 pain Any pain
##   high E2     77      271
##   low E2      42      222
##   mid E2     123      519
chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$NRS_pain))
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and as.factor(e2_full_long$NRS_pain)
## X-squared = 44.635, df = 20, p-value = 0.001236
chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 3.7424, df = 2, p-value = 0.1539
kruskal.test(NRS_pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459
## 
##  Kruskal-Wallis rank sum test
## 
## data:  NRS_pain by e2_bin
## Kruskal-Wallis chi-squared = 7.1609, df = 2, p-value = 0.02786

summary table

#each time point
e2_full_long %>%
  #filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
 # add_overall() %>%
  add_p() %>%
  bold_p()
Combined Table: Features by Estrogen Bin, ALL TIME POINTS
Characteristic Relative Estrogen Bin p-value2
high E2, N = 3571 low E2, N = 2731 mid E2, N = 6661
ED_Age 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade


0.001
    Advanced Degree 18 (5.0%) 18 (6.6%) 51 (7.7%)
    Bachelor's Degree 60 (17%) 69 (25%) 90 (14%)
    High School or Less 138 (39%) 93 (34%) 246 (37%)
    Some College or Associate Degree 141 (39%) 93 (34%) 279 (42%)
BMI 31 (10) 28 (6) 30 (8) 0.005
    Unknown 18 21 57
Race


<0.001
    Hispanic 42 (12%) 54 (20%) 96 (15%)
    Non-Hispanic Black 165 (47%) 96 (35%) 324 (49%)
    Non-Hispanic Other 33 (9.3%) 15 (5.5%) 21 (3.2%)
    Non-Hispanic White 114 (32%) 108 (40%) 219 (33%)
    Unknown 3 0 6
sqrt.conc 9.63 (3.08) 3.01 (0.57) 4.85 (0.74) <0.001
ED_Pain 7 (2) 6 (3) 6 (3) 0.005
bin_pain


0.2
    0 pain 77 (22%) 42 (16%) 123 (19%)
    Any pain 271 (78%) 222 (84%) 519 (81%)
    Unknown 9 9 24
NRS_pain 4.5 (3.3) 4.4 (3.2) 4.0 (3.0) 0.028
    Unknown 9 9 24
bin_pain2


0.020
    0 pain 77 (22%) 42 (16%) 123 (19%)
    1 to 9 pain 247 (71%) 197 (75%) 490 (76%)
    10 pain 24 (6.9%) 25 (9.5%) 29 (4.5%)
    Unknown 9 9 24
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#bin pain 2 and bin pain 3 significant (p<.001)

#month 2
e2_full_long %>%
  filter(timepoint == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
WK8 Table: Features by Estrogen Bin, 8 weeks
Characteristic Overall, N = 4321 Relative Estrogen Bin p-value2
high E2, N = 1191 low E2, N = 911 mid E2, N = 2221
ED_Age 37 (13) 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade



0.3
    Advanced Degree 29 (6.7%) 6 (5.0%) 6 (6.6%) 17 (7.7%)
    Bachelor's Degree 73 (17%) 20 (17%) 23 (25%) 30 (14%)
    High School or Less 159 (37%) 46 (39%) 31 (34%) 82 (37%)
    Some College or Associate Degree 171 (40%) 47 (39%) 31 (34%) 93 (42%)
BMI 30 (8) 31 (10) 28 (6) 30 (8) 0.2
    Unknown 32 6 7 19
Race




    Hispanic 64 (15%) 14 (12%) 18 (20%) 32 (15%)
    Non-Hispanic Black 195 (45%) 55 (47%) 32 (35%) 108 (49%)
    Non-Hispanic Other 23 (5.4%) 11 (9.3%) 5 (5.5%) 7 (3.2%)
    Non-Hispanic White 147 (34%) 38 (32%) 36 (40%) 73 (33%)
    Unknown 3 1 0 2
sqrt.conc 5.78 (3.02) 9.63 (3.09) 3.01 (0.57) 4.85 (0.74) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.2
bin_pain



0.7
    0 pain 52 (13%) 16 (14%) 9 (10%) 27 (13%)
    Any pain 360 (87%) 98 (86%) 78 (90%) 184 (87%)
    Unknown 20 5 4 11
NRS_pain 4.7 (3.0) 5.1 (3.1) 4.7 (3.0) 4.4 (2.9) 0.095
    Unknown 20 5 4 11
bin_pain2



0.5
    0 pain 52 (13%) 16 (14%) 9 (10%) 27 (13%)
    1 to 9 pain 331 (80%) 87 (76%) 71 (82%) 173 (82%)
    10 pain 29 (7.0%) 11 (9.6%) 7 (8.0%) 11 (5.2%)
    Unknown 20 5 4 11
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#bin pain 3 significant (p=0.047) but not bin_pain2

#month 3
e2_full_long %>%
  filter(timepoint == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
M3 Table: Features by Estrogen Bin, 3 months
Characteristic Overall, N = 4321 Relative Estrogen Bin p-value2
high E2, N = 1191 low E2, N = 911 mid E2, N = 2221
ED_Age 37 (13) 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade



0.3
    Advanced Degree 29 (6.7%) 6 (5.0%) 6 (6.6%) 17 (7.7%)
    Bachelor's Degree 73 (17%) 20 (17%) 23 (25%) 30 (14%)
    High School or Less 159 (37%) 46 (39%) 31 (34%) 82 (37%)
    Some College or Associate Degree 171 (40%) 47 (39%) 31 (34%) 93 (42%)
BMI 30 (8) 31 (10) 28 (6) 30 (8) 0.2
    Unknown 32 6 7 19
Race




    Hispanic 64 (15%) 14 (12%) 18 (20%) 32 (15%)
    Non-Hispanic Black 195 (45%) 55 (47%) 32 (35%) 108 (49%)
    Non-Hispanic Other 23 (5.4%) 11 (9.3%) 5 (5.5%) 7 (3.2%)
    Non-Hispanic White 147 (34%) 38 (32%) 36 (40%) 73 (33%)
    Unknown 3 1 0 2
sqrt.conc 5.78 (3.02) 9.63 (3.09) 3.01 (0.57) 4.85 (0.74) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.2
bin_pain



0.6
    0 pain 77 (19%) 25 (22%) 14 (16%) 38 (18%)
    Any pain 333 (81%) 90 (78%) 72 (84%) 171 (82%)
    Unknown 22 4 5 13
NRS_pain 4.2 (3.1) 4.4 (3.3) 4.5 (3.2) 3.9 (2.9) 0.3
    Unknown 22 4 5 13
bin_pain2



0.3
    0 pain 77 (19%) 25 (22%) 14 (16%) 38 (18%)
    1 to 9 pain 309 (75%) 82 (71%) 64 (74%) 163 (78%)
    10 pain 24 (5.9%) 8 (7.0%) 8 (9.3%) 8 (3.8%)
    Unknown 22 4 5 13
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#bin_pain3 is sig (p=0.012) but not bin pain 2 

#month 6
e2_full_long %>%
  filter(timepoint == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
M6 Table: Features by Estrogen Bin, 6 months
Characteristic Overall, N = 4321 Relative Estrogen Bin p-value2
high E2, N = 1191 low E2, N = 911 mid E2, N = 2221
ED_Age 37 (13) 33 (11) 44 (13) 37 (14) <0.001
ED_HighestGrade



0.3
    Advanced Degree 29 (6.7%) 6 (5.0%) 6 (6.6%) 17 (7.7%)
    Bachelor's Degree 73 (17%) 20 (17%) 23 (25%) 30 (14%)
    High School or Less 159 (37%) 46 (39%) 31 (34%) 82 (37%)
    Some College or Associate Degree 171 (40%) 47 (39%) 31 (34%) 93 (42%)
BMI 30 (8) 31 (10) 28 (6) 30 (8) 0.2
    Unknown 32 6 7 19
Race




    Hispanic 64 (15%) 14 (12%) 18 (20%) 32 (15%)
    Non-Hispanic Black 195 (45%) 55 (47%) 32 (35%) 108 (49%)
    Non-Hispanic Other 23 (5.4%) 11 (9.3%) 5 (5.5%) 7 (3.2%)
    Non-Hispanic White 147 (34%) 38 (32%) 36 (40%) 73 (33%)
    Unknown 3 1 0 2
sqrt.conc 5.78 (3.02) 9.63 (3.09) 3.01 (0.57) 4.85 (0.74) <0.001
ED_Pain 6 (3) 7 (2) 6 (3) 6 (3) 0.2
bin_pain



0.3
    0 pain 113 (26%) 36 (30%) 19 (21%) 58 (26%)
    Any pain 319 (74%) 83 (70%) 72 (79%) 164 (74%)
NRS_pain 3.7 (3.2) 3.9 (3.3) 4.1 (3.4) 3.5 (3.0) 0.4
bin_pain2



0.12
    0 pain 113 (26%) 36 (30%) 19 (21%) 58 (26%)
    1 to 9 pain 294 (68%) 78 (66%) 62 (68%) 154 (69%)
    10 pain 25 (5.8%) 5 (4.2%) 10 (11%) 10 (4.5%)
1 Mean (SD); n (%)
2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test
#bin pain 2 sig (p=0.021) and bin pain 3 sig (p<.001)

stats for new categories

table(e2_full_long$e2_bin, e2_full_long$bin_pain2)
##          
##           0 pain 1 to 9 pain 10 pain
##   high E2     77         247      24
##   low E2      42         197      25
##   mid E2     123         490      29
round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain2), margin = 1) * 100,2)
##          
##           0 pain 1 to 9 pain 10 pain
##   high E2  22.13       70.98    6.90
##   low E2   15.91       74.62    9.47
##   mid E2   19.16       76.32    4.52
chisq.test(e2_full_long[e2_full_long$timepoint == 2, "e2_bin"], e2_full_long[e2_full_long$timepoint == 2, "bin_pain2"]) # p-value = 0.03053
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$timepoint == 2, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 2, "bin_pain2"]
## X-squared = 3.0896, df = 4, p-value = 0.5429
chisq.test(e2_full_long[e2_full_long$timepoint == 3, "e2_bin"], e2_full_long[e2_full_long$timepoint == 3, "bin_pain2"]) #p-value = 0.007257
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$timepoint == 3, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 3, "bin_pain2"]
## X-squared = 4.7671, df = 4, p-value = 0.312
chisq.test(e2_full_long[e2_full_long$timepoint == 6, "e2_bin"], e2_full_long[e2_full_long$timepoint == 6, "bin_pain2"]) #p-value = 7.419e-05
## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$timepoint == 6, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 6, "bin_pain2"]
## X-squared = 7.3016, df = 4, p-value = 0.1208

redo model from above

MARCH 2024 NEW E2 DATA CODE GLS MODEL

e2_model <- gls(NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ timepoint | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)

summary(e2_model)
## Generalized least squares fit by REML
##   Model: NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade +      BMI + ED_Pain + Race + Plate.Number + e2_bin 
##   Data: e2_full_long_gls_complete 
##        AIC      BIC    logLik
##   3117.277 3194.474 -1541.638
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~timepoint | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.7112736 
## 
## Coefficients:
##                                                      Value Std.Error   t-value
## (Intercept)                                      2.2400046 1.1756720  1.905297
## timepoint                                       -0.1500786 0.0480453 -3.123692
## sqrt.conc                                       -0.0013510 0.0759092 -0.017797
## ED_Age                                           0.0355369 0.0103781  3.424209
## ED_HighestGradeBachelor's Degree                 0.5937591 0.5415318  1.096444
## ED_HighestGradeHigh School or Less               1.4262587 0.5198131  2.743791
## ED_HighestGradeSome College or Associate Degree  1.4480613 0.5061676  2.860833
## BMI                                              0.0267255 0.0147797  1.808262
## ED_Pain                                          0.1949444 0.0518378  3.760663
## RaceNon-Hispanic Black                          -0.1159866 0.3971122 -0.292075
## RaceNon-Hispanic Other                          -0.6842188 0.6211972 -1.101452
## RaceNon-Hispanic White                          -0.7103415 0.4011102 -1.770938
## Plate.Number                                     0.0051389 0.0203987  0.251921
## e2_binlow E2                                    -0.3251953 0.6294793 -0.516610
## e2_binmid E2                                    -0.7181006 0.4736169 -1.516205
##                                                 p-value
## (Intercept)                                      0.0572
## timepoint                                        0.0019
## sqrt.conc                                        0.9858
## ED_Age                                           0.0007
## ED_HighestGradeBachelor's Degree                 0.2733
## ED_HighestGradeHigh School or Less               0.0062
## ED_HighestGradeSome College or Associate Degree  0.0044
## BMI                                              0.0710
## ED_Pain                                          0.0002
## RaceNon-Hispanic Black                           0.7703
## RaceNon-Hispanic Other                           0.2711
## RaceNon-Hispanic White                           0.0770
## Plate.Number                                     0.8012
## e2_binlow E2                                     0.6056
## e2_binmid E2                                     0.1299
## 
##  Correlation: 
##                                                 (Intr) timpnt sqrt.c ED_Age
## timepoint                                       -0.163                     
## sqrt.conc                                       -0.689  0.000              
## ED_Age                                          -0.317  0.000  0.095       
## ED_HighestGradeBachelor's Degree                -0.150  0.000 -0.055  0.039
## ED_HighestGradeHigh School or Less              -0.253  0.000  0.010  0.170
## ED_HighestGradeSome College or Associate Degree -0.187  0.000 -0.046  0.088
## BMI                                             -0.418  0.000  0.113 -0.154
## ED_Pain                                         -0.272  0.000 -0.035  0.047
## RaceNon-Hispanic Black                          -0.109  0.000  0.029 -0.142
## RaceNon-Hispanic Other                          -0.112  0.000  0.027 -0.036
## RaceNon-Hispanic White                          -0.129  0.000  0.028 -0.181
## Plate.Number                                    -0.175  0.000  0.014  0.006
## e2_binlow E2                                    -0.628  0.000  0.803 -0.103
## e2_binmid E2                                    -0.660  0.000  0.771 -0.022
##                                                 ED_HGD ED_SoL EDCoAD BMI   
## timepoint                                                                  
## sqrt.conc                                                                  
## ED_Age                                                                     
## ED_HighestGradeBachelor's Degree                                           
## ED_HighestGradeHigh School or Less               0.723                     
## ED_HighestGradeSome College or Associate Degree  0.751  0.833              
## BMI                                             -0.113 -0.057 -0.099       
## ED_Pain                                         -0.116 -0.213 -0.152  0.029
## RaceNon-Hispanic Black                          -0.190 -0.318 -0.322  0.004
## RaceNon-Hispanic Other                          -0.073 -0.177 -0.148 -0.047
## RaceNon-Hispanic White                          -0.198 -0.212 -0.219 -0.018
## Plate.Number                                    -0.086 -0.016 -0.085  0.109
## e2_binlow E2                                    -0.071 -0.007 -0.039  0.191
## e2_binmid E2                                    -0.026  0.013 -0.022  0.133
##                                                 ED_Pan RcN-HB RcN-HO RcN-HW
## timepoint                                                                  
## sqrt.conc                                                                  
## ED_Age                                                                     
## ED_HighestGradeBachelor's Degree                                           
## ED_HighestGradeHigh School or Less                                         
## ED_HighestGradeSome College or Associate Degree                            
## BMI                                                                        
## ED_Pain                                                                    
## RaceNon-Hispanic Black                           0.022                     
## RaceNon-Hispanic Other                           0.088  0.487              
## RaceNon-Hispanic White                           0.117  0.722  0.471       
## Plate.Number                                     0.056 -0.106 -0.124 -0.164
## e2_binlow E2                                     0.015  0.089  0.045  0.074
## e2_binmid E2                                     0.021  0.097  0.121  0.098
##                                                 Plt.Nm e2_bnlE2
## timepoint                                                      
## sqrt.conc                                                      
## ED_Age                                                         
## ED_HighestGradeBachelor's Degree                               
## ED_HighestGradeHigh School or Less                             
## ED_HighestGradeSome College or Associate Degree                
## BMI                                                            
## ED_Pain                                                        
## RaceNon-Hispanic Black                                         
## RaceNon-Hispanic Other                                         
## RaceNon-Hispanic White                                         
## Plate.Number                                                   
## e2_binlow E2                                    -0.058         
## e2_binmid E2                                    -0.011  0.829  
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.42631492 -0.72849950  0.00436402  0.71323539  2.35250577 
## 
## Residual standard error: 2.420916 
## Degrees of freedom: 708 total; 693 residual
#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain2,
                                        `0 pain` = "1",
                                        `1 to 9 pain` = "2",
                                        `10 pain` = "3") #new dimensions 1163 x 17

#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ timepoint | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2    p = 2.398452e-03 = 0.002398452  
## # weights:  48 (30 variable)
## initial  value 1277.686092 
## iter  10 value 978.687050
## iter  20 value 828.822196
## iter  30 value 768.995579
## iter  40 value 768.314759
## final  value 768.314605 
## converged
#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE)
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 1.1476098 0.7192362 1.5955951 0.1105792 -0.2620673 2.5572868
2 timepoint -0.1977753 0.0441436 -4.4802701 0.0000075 -0.2842952 -0.1112554
2 sqrt.conc -0.0287966 0.0400877 -0.7183419 0.4725465 -0.1073670 0.0497737
2 ED_Age 0.0253562 0.0066856 3.7926427 0.0001491 0.0122526 0.0384598
2 ED_HighestGradeBachelor’s Degree 0.1368416 0.3705171 0.3693261 0.7118847 -0.5893586 0.8630418
2 ED_HighestGradeHigh School or Less -0.1608027 0.3463921 -0.4642217 0.6424889 -0.8397188 0.5181133
2 ED_HighestGradeSome College or Associate Degree -0.0484946 0.3412816 -0.1420956 0.8870045 -0.7173943 0.6204051
2 BMI 0.0115329 0.0098899 1.1661257 0.2435636 -0.0078510 0.0309168
2 ED_Pain 0.0911738 0.0316376 2.8818140 0.0039539 0.0291652 0.1531825
2 RaceNon-Hispanic Black -0.5527326 0.2668155 -2.0715908 0.0383036 -1.0756814 -0.0297838
2 RaceNon-Hispanic Other -0.3304876 0.3902482 -0.8468653 0.3970702 -1.0953600 0.4343847
2 RaceNon-Hispanic White -0.3713711 0.2762933 -1.3441190 0.1789099 -0.9128961 0.1701539
2 Plate.Number -0.0211626 0.0119249 -1.7746606 0.0759539 -0.0445349 0.0022097
2 e2_binlow E2 0.1952720 0.3712315 0.5260114 0.5988803 -0.5323284 0.9228724
2 e2_binmid E2 0.0093582 0.2724513 0.0343481 0.9725995 -0.5246365 0.5433529
3 (Intercept) -3.8222607 1.6508923 -2.3152696 0.0205982 -7.0579502 -0.5865713
3 timepoint -0.2117971 0.0815971 -2.5956444 0.0094414 -0.3717245 -0.0518697
3 sqrt.conc -0.0327149 0.0825517 -0.3962960 0.6918867 -0.1945133 0.1290835
3 ED_Age 0.0573083 0.0119448 4.7977392 0.0000016 0.0338968 0.0807197
3 ED_HighestGradeBachelor’s Degree 1.0048357 1.1494872 0.8741600 0.3820311 -1.2481178 3.2577893
3 ED_HighestGradeHigh School or Less 1.6704468 1.0845166 1.5402684 0.1234949 -0.4551668 3.7960603
3 ED_HighestGradeSome College or Associate Degree 1.7184618 1.0819146 1.5883526 0.1122066 -0.4020517 3.8389754
3 BMI -0.0240810 0.0196057 -1.2282606 0.2193491 -0.0625075 0.0143456
3 ED_Pain 0.2963213 0.0640951 4.6231501 0.0000038 0.1706972 0.4219455
3 RaceNon-Hispanic Black -0.5835581 0.4182776 -1.3951455 0.1629720 -1.4033671 0.2362509
3 RaceNon-Hispanic Other -0.5588189 0.6948961 -0.8041762 0.4212952 -1.9207901 0.8031523
3 RaceNon-Hispanic White -0.8144662 0.4747860 -1.7154384 0.0862649 -1.7450297 0.1160973
3 Plate.Number -0.0450055 0.0221329 -2.0334249 0.0420096 -0.0883852 -0.0016259
3 e2_binlow E2 0.1952197 0.6764448 0.2885967 0.7728900 -1.1305877 1.5210272
3 e2_binmid E2 -0.6145368 0.5141257 -1.1953045 0.2319681 -1.6222048 0.3931311
#e2_bin low E2 on category 3 (10 pain)  p = 2.398452e-03 = 0.002398452  
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain



# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 2, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 421.867119 
## iter  10 value 247.665633
## iter  20 value 222.152402
## iter  30 value 220.540297
## iter  40 value 220.461828
## final  value 220.461395 
## converged
tidy(model_m2, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain 
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.6332172 1.4645354 0.4323673 0.6654745 -2.2372194 3.5036537
2 sqrt.conc -0.0439258 0.0817465 -0.5373413 0.5910319 -0.2041460 0.1162945
2 ED_Age 0.0345304 0.0143728 2.4024876 0.0162840 0.0063603 0.0627005
2 ED_HighestGradeBachelor’s Degree 0.0253283 0.7807131 0.0324425 0.9741191 -1.5048413 1.5554980
2 ED_HighestGradeHigh School or Less -0.5408934 0.7316123 -0.7393170 0.4597145 -1.9748272 0.8930404
2 ED_HighestGradeSome College or Associate Degree -0.1115448 0.7245922 -0.1539415 0.8776558 -1.5317195 1.3086299
2 BMI -0.0011304 0.0200715 -0.0563180 0.9550884 -0.0404699 0.0382091
2 ED_Pain 0.2278132 0.0643531 3.5400523 0.0004000 0.1016835 0.3539429
2 RaceNon-Hispanic Black -0.9946271 0.6589663 -1.5093747 0.1312030 -2.2861773 0.2969231
2 RaceNon-Hispanic Other -0.8541552 0.8588881 -0.9944895 0.3199846 -2.5375450 0.8292346
2 RaceNon-Hispanic White -0.9587317 0.6680304 -1.4351618 0.1512410 -2.2680471 0.3505838
2 Plate.Number 0.0168240 0.0253360 0.6640359 0.5066673 -0.0328336 0.0664816
2 e2_binlow E2 -0.0731302 0.7770463 -0.0941130 0.9250194 -1.5961130 1.4498527
2 e2_binmid E2 -0.1387671 0.5726973 -0.2423044 0.8085443 -1.2612332 0.9836990
3 (Intercept) -15.6095805 1.6871690 -9.2519365 0.0000000 -18.9163710 -12.3027900
3 sqrt.conc -0.0780987 0.1330539 -0.5869704 0.5572236 -0.3388795 0.1826821
3 ED_Age 0.0547499 0.0213335 2.5663871 0.0102764 0.0129371 0.0965627
3 ED_HighestGradeBachelor’s Degree 13.6045622 0.8144407 16.7041781 0.0000000 12.0082878 15.2008366
3 ED_HighestGradeHigh School or Less 13.5770128 0.6342322 21.4070066 0.0000000 12.3339405 14.8200851
3 ED_HighestGradeSome College or Associate Degree 14.1572732 0.6699970 21.1303518 0.0000000 12.8441031 15.4704432
3 BMI -0.0496474 0.0349347 -1.4211501 0.1552731 -0.1181182 0.0188233
3 ED_Pain 0.4303770 0.1148114 3.7485562 0.0001779 0.2053508 0.6554032
3 RaceNon-Hispanic Black -1.1582314 0.8419886 -1.3755904 0.1689485 -2.8084987 0.4920359
3 RaceNon-Hispanic Other -1.7429841 1.3997560 -1.2452056 0.2130562 -4.4864555 1.0004873
3 RaceNon-Hispanic White -1.1919065 0.9022291 -1.3210685 0.1864785 -2.9602431 0.5764301
3 Plate.Number 0.0146474 0.0386702 0.3787780 0.7048527 -0.0611448 0.0904396
3 e2_binlow E2 -0.7254233 1.1592525 -0.6257682 0.5314670 -2.9975163 1.5466698
3 e2_binmid E2 -0.9939188 0.8655141 -1.1483565 0.2508214 -2.6902953 0.7024577
#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 3, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 419.669894 
## iter  10 value 266.809692
## iter  20 value 247.485588
## iter  30 value 246.566597
## final  value 246.564576 
## converged
tidy(model_m3, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #no significance
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.7142391 1.2378680 0.5769913 0.5639453 -1.7119376 3.1404158
2 sqrt.conc -0.0530729 0.0694125 -0.7646012 0.4445091 -0.1891189 0.0829731
2 ED_Age 0.0269198 0.0116536 2.3099983 0.0208882 0.0040792 0.0497604
2 ED_HighestGradeBachelor’s Degree 0.1492462 0.6786421 0.2199189 0.8259343 -1.1808679 1.4793603
2 ED_HighestGradeHigh School or Less -0.2373798 0.6272214 -0.3784626 0.7050870 -1.4667113 0.9919516
2 ED_HighestGradeSome College or Associate Degree -0.1810475 0.6186932 -0.2926289 0.7698058 -1.3936638 1.0315688
2 BMI 0.0204176 0.0175726 1.1619012 0.2452756 -0.0140240 0.0548592
2 ED_Pain 0.0504263 0.0553806 0.9105411 0.3625372 -0.0581177 0.1589703
2 RaceNon-Hispanic Black -0.7860288 0.4907704 -1.6016223 0.1092391 -1.7479211 0.1758635
2 RaceNon-Hispanic Other -0.2419856 0.7248566 -0.3338393 0.7385008 -1.6626784 1.1787072
2 RaceNon-Hispanic White -0.6911603 0.5073726 -1.3622342 0.1731240 -1.6855924 0.3032717
2 Plate.Number -0.0074184 0.0207205 -0.3580208 0.7203278 -0.0480297 0.0331930
2 e2_binlow E2 0.0100750 0.6477340 0.0155542 0.9875900 -1.2594603 1.2796103
2 e2_binmid E2 -0.0795187 0.4755080 -0.1672290 0.8671899 -1.0114973 0.8524599
3 (Intercept) -3.8298945 2.5683366 -1.4911965 0.1359099 -8.8637417 1.2039528
3 sqrt.conc -0.0088107 0.1510366 -0.0583348 0.9534819 -0.3048370 0.2872156
3 ED_Age 0.0541307 0.0219750 2.4632856 0.0137670 0.0110605 0.0972009
3 ED_HighestGradeBachelor’s Degree -0.1025653 1.4258754 -0.0719314 0.9426565 -2.8972298 2.6920992
3 ED_HighestGradeHigh School or Less 0.4101094 1.2582336 0.3259406 0.7444693 -2.0559832 2.8762020
3 ED_HighestGradeSome College or Associate Degree 0.0358165 1.2661373 0.0282880 0.9774324 -2.4457670 2.5174000
3 BMI -0.0028174 0.0340501 -0.0827416 0.9340570 -0.0695544 0.0639197
3 ED_Pain 0.3491565 0.1237802 2.8207789 0.0047907 0.1065518 0.5917611
3 RaceNon-Hispanic Black -1.1091481 0.7432089 -1.4923773 0.1356003 -2.5658107 0.3475145
3 RaceNon-Hispanic Other -0.7506290 1.3307111 -0.5640811 0.5726989 -3.3587749 1.8575169
3 RaceNon-Hispanic White -0.8894535 0.8131545 -1.0938309 0.2740292 -2.4832071 0.7043000
3 Plate.Number -0.0826449 0.0446737 -1.8499656 0.0643185 -0.1702038 0.0049140
3 e2_binlow E2 0.3192749 1.2475478 0.2559220 0.7980111 -2.1258739 2.7644238
3 e2_binmid E2 -0.6698535 0.9487884 -0.7060093 0.4801823 -2.5294445 1.1897376
#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 6, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights:  45 (28 variable)
## initial  value 436.149079 
## iter  10 value 307.224469
## iter  20 value 284.418287
## iter  30 value 282.243213
## iter  40 value 282.104992
## iter  50 value 282.103013
## iter  50 value 282.103010
## iter  50 value 282.103010
## final  value 282.103010 
## converged
tidy(model_m6, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance
y.level term estimate std.error statistic p.value conf.low conf.high
2 (Intercept) 0.0712850 1.0728075 0.0664472 0.9470218 -2.0313790 2.1739491
2 sqrt.conc -0.0006349 0.0632923 -0.0100314 0.9919963 -0.1246856 0.1234158
2 ED_Age 0.0199455 0.0101454 1.9659702 0.0493021 0.0000609 0.0398301
2 ED_HighestGradeBachelor’s Degree 0.1881553 0.5495107 0.3424051 0.7320460 -0.8888659 1.2651764
2 ED_HighestGradeHigh School or Less 0.1257468 0.5203582 0.2416543 0.8090481 -0.8941366 1.1456302
2 ED_HighestGradeSome College or Associate Degree 0.0885682 0.5102420 0.1735807 0.8621950 -0.9114878 1.0886241
2 BMI 0.0121089 0.0152307 0.7950334 0.4265941 -0.0177427 0.0419605
2 ED_Pain 0.0421307 0.0495129 0.8509044 0.3948225 -0.0549128 0.1391742
2 RaceNon-Hispanic Black -0.2452478 0.3799673 -0.6454446 0.5186391 -0.9899700 0.4994744
2 RaceNon-Hispanic Other -0.2149588 0.5827748 -0.3688541 0.7122365 -1.3571764 0.9272587
2 RaceNon-Hispanic White 0.0861751 0.3968412 0.2171527 0.8280893 -0.6916193 0.8639695
2 Plate.Number -0.0534691 0.0185320 -2.8852326 0.0039112 -0.0897911 -0.0171471
2 e2_binlow E2 0.4898350 0.5723151 0.8558834 0.3920623 -0.6318820 1.6115520
2 e2_binmid E2 0.1372568 0.4195159 0.3271791 0.7435324 -0.6849792 0.9594929
3 (Intercept) -17.8847896 1.8883086 -9.4713277 0.0000000 -21.5858064 -14.1837727
3 sqrt.conc -0.0087239 0.1694346 -0.0514886 0.9589362 -0.3408097 0.3233618
3 ED_Age 0.0756291 0.0214585 3.5244337 0.0004244 0.0335712 0.1176870
3 ED_HighestGradeBachelor’s Degree 12.1648748 1.0485328 11.6018063 0.0000000 10.1097882 14.2199614
3 ED_HighestGradeHigh School or Less 13.7826016 0.6979667 19.7467889 0.0000000 12.4146120 15.1505913
3 ED_HighestGradeSome College or Associate Degree 13.7298894 0.7384538 18.5927530 0.0000000 12.2825465 15.1772323
3 BMI -0.0246015 0.0351234 -0.7004303 0.4836586 -0.0934421 0.0442391
3 ED_Pain 0.1905089 0.1038627 1.8342384 0.0666186 -0.0130582 0.3940761
3 RaceNon-Hispanic Black 0.1621323 0.7508772 0.2159238 0.8290471 -1.3095600 1.6338245
3 RaceNon-Hispanic Other 0.5361732 1.0952850 0.4895285 0.6244676 -1.6105459 2.6828924
3 RaceNon-Hispanic White -0.9740496 0.9465993 -1.0289989 0.3034802 -2.8293501 0.8812509
3 Plate.Number -0.0650576 0.0379438 -1.7145790 0.0864224 -0.1394260 0.0093108
3 e2_binlow E2 1.0543342 1.3008846 0.8104748 0.4176673 -1.4953528 3.6040211
3 e2_binmid E2 -0.1763840 0.9934211 -0.1775521 0.8590748 -2.1234535 1.7706856
#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)

miRNA

miRNA data

#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned) 
#miRNA_cleaned dimensions 596 x 1552

Link miRNA with keys

#sample ID to inventory ID and PID Keys
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")


ID2 <- ID_link %>% 
  right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub")) 

ID_miRNA <- miRNA_cleaned %>%
  mutate(sample=as.numeric(sample)) %>% 
  left_join(ID2,by="sample") #596 samples

length(unique(ID_miRNA$PID)) # unique PID 294
## [1] 294

subset to only the timepoint 0/ED visit miRNA values

ID_miRNA_T0 <- ID_miRNA %>% 
  filter(`ALT ID` == "T0") #dimensions 294 x 1557

miRNA

NEW MARCH 2024 CODE FOR miRNA

none of the PIDs match up between the new estrogen data and the miRNA data

#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)

#e2_wide_all<- e2_full_wide %>% 
 # inner_join(ID_miRNA,by="PID") 

#E2_long_all<- e2_full_long %>% 
 # inner_join(ID_miRNA,by="PID")