March 2024 Associating female E2–>Pain, mixture models, LCA

library(data.table);library(ggfortify)
library(factoextra);library(ClassDiscovery)
library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)
library(tidyverse);library(naniar);library(gtsummary)
require(lme4); require(nnet);
library(broom);library(readxl);library(nlme);library(sjPlot)
library(flexmix);library(poLCA)

read in and subset data

e2_full_long <- as.data.frame(read.csv("cleaned_E2_and_Aurora_Data.csv"))

#subset to estrogen collected at timepoint 0 == e2 collected at ED visit. convert pain to numeric
e2_full_long <- e2_full_long %>% 
  filter(AlternateID == "T0") %>%
  mutate(NRS_pain = as.numeric(NRS_pain))
#1296 x 27

length(unique(e2_full_long[["PID"]])) #432 PID

## [1] 432

March 2024 Batch 2 GLS model

#need complete cases to run this model. 
e2_full_long_gls_complete <- e2_full_long[complete.cases(e2_full_long),] #708 
e2_full_long_gls <- e2_full_long[complete.cases(e2_full_long$NRS_pain), ] #1254


#e2_model <- gls(NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)

#e2_model <- gls(NRS_pain ~ 1, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)

#tab_model(e2_model)

Use flexmix package for mixture modeling

df <- e2_full_long_gls #1254 
df6 <- e2_full_long_gls %>% filter(timepoint == 6) #432 obs
df6_complete <- e2_full_long_gls_complete %>% filter(timepoint == 6) #236 obs

#attempt #1
m <- flexmix(NRS_pain ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, k = 2, data=df6_complete)
summary(m)

## 
## Call:
## flexmix(formula = NRS_pain ~ sqrt.conc + ED_Age + ED_HighestGrade + 
##     BMI + ED_Pain + Race + Plate.Number, data = df6_complete, 
##     k = 2)
## 
##        prior size post>0 ratio
## Comp.1 0.199   42    126 0.333
## Comp.2 0.801  194    236 0.822
## 
## 'log Lik.' -521.7948 (df=27)
## AIC: 1097.59   BIC: 1191.113

parameters(m, component = 1)

##                                                           Comp.1
## coef.(Intercept)                                      6.52428059
## coef.sqrt.conc                                       -0.11282087
## coef.ED_Age                                          -0.03751555
## coef.ED_HighestGradeBachelor's Degree                -2.03571560
## coef.ED_HighestGradeHigh School or Less              -1.35848215
## coef.ED_HighestGradeSome College or Associate Degree -1.26509036
## coef.BMI                                              0.16228141
## coef.ED_Pain                                         -0.64069568
## coef.RaceNon-Hispanic Black                          -0.89860325
## coef.RaceNon-Hispanic Other                          -5.94522871
## coef.RaceNon-Hispanic White                          -2.43011311
## coef.Plate.Number                                     0.20282749
## sigma                                                 0.60344288

attempt #2

ggplot(df6, aes(x = NRS_pain)) + 
  geom_histogram(aes(NRS_pain, ..density..), binwidth = 1, colour = "black", fill = "white")

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#k parameters 2 instead of 3
mo1 <- FLXMRglm(family = "gaussian")
mo2 <- FLXMRglm(family = "gaussian")
#mo3 <- FLXMRglm(family = "gaussian")
m2 <- flexmix(NRS_pain ~ 1, data = df6, k = 2, model = list(mo1, mo2))
#flexfit <- flexmix(NRS_pain ~ 1, data = df6, k = 3, model = list(mo1, mo2, mo3))


c1 <- parameters(m2, component=1)[[1]]
c2 <- parameters(m2, component=2)[[1]]
#c3 <- parameters(m2, component=3)[[1]]

plot_mix_comps <- function(x, mu, sigma, lam) {
  lam * dnorm(x, mu, sigma)
}

lam <- table(clusters(m2))
  
ggplot(df6) +
geom_histogram(aes(NRS_pain, ..density..), binwidth = 1, colour = "black", fill = "white") +
stat_function(geom = "line", fun = plot_mix_comps,
                args = list(c1[1], c1[2], lam[1]/sum(lam)),
                colour = "red", lwd = 1.5) +
stat_function(geom = "line", fun = plot_mix_comps,
                args = list(c2[1], c2[2], lam[2]/sum(lam)),
                colour = "blue", lwd = 1.5) +
#stat_function(geom = "line", fun = plot_mix_comps,args = list(c3[1], c3[2], lam[3]/sum(lam)),colour = "green", lwd = 1.5) +
ylab("Density")

#maybe

mixture model attempt #3

seed=69

df6_simple <- df6[,c("PID","NRS_pain","sqrt.conc")]

ggplot(df, aes(x = NRS_pain)) + 
  geom_density(color = "black", fill = "white") +
  labs(x = "Pain", y = "Density", title = "Density Plot of Pain all times")

ggplot(df6_simple, aes(x = NRS_pain)) + 
  geom_density(color = "black", fill = "white") +
  labs(x = "Pain", y = "Density", title = "Density Plot of Pain MONTH 6")

#m3 = flexmix(NRS_pain ~ sqrt.conc, data=df6_simple, k=2)
#summary(m3)
#parameters(m3)

latent class analysis

#recode e2_bin so it can be used in the model
df6_lca <- df6_complete %>%
  mutate(e2_bin = recode(e2_bin, "low E2" = 1, "mid E2" = 2, "high E2" = 3))

#everything numeric start at 1, so increment NRS pain
df6_lca$NRS_pain <- df6_lca$NRS_pain + 1

#increment postmenopause
df6_lca$postmenopause <- df6_lca$postmenopause + 1

#increment axial
df6_lca$Axial_Pain <- df6_lca$Axial_Pain + 1

#increment widespread
df6_lca$Widespread_pain_2016 <- df6_lca$Widespread_pain_2016 + 1

#round conc
df6_lca$sqrt.conc <- round(df6_lca$sqrt.conc,digits=0)

#recode bmi
breaks <- c(-Inf, 18.5, 24.9, 29.9, Inf)
labels <- c("Underweight", "Healthy Weight", "Overweight", "Obesity")
df6_lca <- df6_lca %>%
  mutate(BMI_cat = cut(BMI, breaks = breaks, labels = labels, include.lowest = TRUE))

#bin pains
df6_lca$bin_pain2 <- as.factor(df6_lca$bin_pain2)

levels(df6_lca$bin_pain2)

## [1] "1 to 9 pain" "10 pain"

f1 <- cbind(NRS_pain, sqrt.conc, Widespread_pain_2016,Axial_Pain, postmenopause,e2_bin)~1
f2 <- cbind(NRS_pain, sqrt.conc, Widespread_pain_2016,Axial_Pain, postmenopause,e2_bin, e2_confidence)~1
f3 <- cbind(Widespread_pain_2016,Axial_Pain, postmenopause, e2_confidence, BMI_cat,bin_pain2)~1


LCA2 <- poLCA(f1, data=df6_lca, nclass=2) #AIC(2): 3371.711 BIC(2): 3610.715

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $NRS_pain
##           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:      0 0.1937 0.1285 0.2729 0.1610 0.1184 0.0786 0.0226 0.0103 0.0000
## class 2:      0 0.0088 0.0090 0.0203 0.0675 0.1266 0.1834 0.2227 0.1551 0.0624
##           Pr(11)
## class 1:  0.0139
## class 2:  0.1443
## 
## $sqrt.conc
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:  0.0000 0.0587 0.1492 0.2392 0.2078 0.1514 0.0612 0.0215 0.0293 0.0169
## class 2:  0.0078 0.0598 0.1163 0.1967 0.2387 0.0288 0.1123 0.0677 0.0378 0.0403
##           Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1:  0.0091 0.0196 0.0174 0.0000 0.0093 0.0000      0      0      0 0.0093
## class 2:  0.0157 0.0147 0.0087 0.0312 0.0000 0.0234      0      0      0 0.0000
## 
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.6045 0.3955
## class 2:  0.2875 0.7125
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.1581 0.8419
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.7357 0.2643
## class 2:  0.7386 0.2614
## 
## $e2_bin
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.2560 0.5504 0.1936
## class 2:  0.2138 0.4343 0.3519
## 
## Estimated class population shares 
##  0.4564 0.5436 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4788 0.5212 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 69 
## residual degrees of freedom: 167 
## maximum log-likelihood: -1694.527 
##  
## AIC(2): 3527.055
## BIC(2): 3766.059
## G^2(2): 982.0811 (Likelihood ratio/deviance statistic) 
## X^2(2): 3672.267 (Chi-square goodness of fit) 
##

LCA3 <- poLCA(f2, data=df6_lca, nclass=2) #AIC(2): 3957.45 BIC(2): 4217.238

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $NRS_pain
##           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:      0 0.0758 0.0606 0.1061 0.1212 0.0758 0.1515 0.1970 0.0909 0.0455
## class 2:      0 0.1000 0.0647 0.1471 0.1059 0.1412 0.1294 0.1059 0.0882 0.0294
##           Pr(11)
## class 1:  0.0758
## class 2:  0.0882
## 
## $sqrt.conc
##            Pr(1)  Pr(2)  Pr(3) Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:  0.0000 0.0000 0.0000   0.0 0.0000 0.0000 0.3182 0.1667 0.1212 0.1061
## class 2:  0.0059 0.0824 0.1824   0.3 0.3118 0.1176 0.0000 0.0000 0.0000 0.0000
##           Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1:  0.0455 0.0606 0.0455 0.0606 0.0152 0.0455      0      0      0 0.0152
## class 2:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000      0      0      0 0.0000
## 
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.4091 0.5909
## class 2:  0.4412 0.5588
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.4848 0.5152
## class 2:  0.5647 0.4353
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.9242 0.0758
## class 2:  0.6647 0.3353
## 
## $e2_bin
##            Pr(1)  Pr(2) Pr(3)
## class 1:  0.0000 0.0000     1
## class 2:  0.3235 0.6765     0
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.6667 0.1818 0.0758 0.0758
## class 2:  0.4059 0.2588 0.1412 0.1941
## 
## Estimated class population shares 
##  0.2797 0.7203 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.2797 0.7203 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 75 
## residual degrees of freedom: 161 
## maximum log-likelihood: -1903.725 
##  
## AIC(2): 3957.45
## BIC(2): 4217.238
## G^2(2): 1296.11 (Likelihood ratio/deviance statistic) 
## X^2(2): 8004.288 (Chi-square goodness of fit) 
##

LCA4 <- poLCA(f2, data=df6_lca, nclass=3) #AIC(3): 3977.52 BIC(3): 4368.933

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $NRS_pain
##           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:      0 0.0957 0.0783 0.1391 0.1130 0.1304 0.1565 0.1217 0.0783 0.0261
## class 2:      0 0.1091 0.0364 0.1636 0.0909 0.1636 0.0727 0.0727 0.1091 0.0364
## class 3:      0 0.0758 0.0606 0.1061 0.1212 0.0758 0.1515 0.1970 0.0909 0.0455
##           Pr(11)
## class 1:  0.0609
## class 2:  0.1455
## class 3:  0.0758
## 
## $sqrt.conc
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)  Pr(8)  Pr(9) Pr(10)
## class 1:  0.0000 0.0000 0.0000 0.3652 0.4609 0.1739 0.0000 0.0000 0.0000 0.0000
## class 2:  0.0182 0.2545 0.5636 0.1636 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## class 3:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.3182 0.1667 0.1212 0.1061
##           Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000      0      0      0 0.0000
## class 2:  0.0000 0.0000 0.0000 0.0000 0.0000 0.0000      0      0      0 0.0000
## class 3:  0.0455 0.0606 0.0455 0.0606 0.0152 0.0455      0      0      0 0.0152
## 
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.4696 0.5304
## class 2:  0.3818 0.6182
## class 3:  0.4091 0.5909
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.5652 0.4348
## class 2:  0.5636 0.4364
## class 3:  0.4848 0.5152
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.7391 0.2609
## class 2:  0.5091 0.4909
## class 3:  0.9242 0.0758
## 
## $e2_bin
##           Pr(1) Pr(2) Pr(3)
## class 1:      0     1     0
## class 2:      1     0     0
## class 3:      0     0     1
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.4957 0.1913 0.1217 0.1913
## class 2:  0.2182 0.4000 0.1818 0.2000
## class 3:  0.6667 0.1818 0.0758 0.0758
## 
## Estimated class population shares 
##  0.4873 0.2331 0.2797 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4873 0.2331 0.2797 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 113 
## residual degrees of freedom: 123 
## maximum log-likelihood: -1804.119 
##  
## AIC(3): 3834.237
## BIC(3): 4225.65
## G^2(3): 1096.897 (Likelihood ratio/deviance statistic) 
## X^2(3): 4775.175 (Chi-square goodness of fit) 
##

LCA5 <- poLCA(f3, data=df6_lca, nclass=2) #AIC(2): 1979.188 BIC(2): 2038.073 #this and LCA6 are best

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.1844 0.8156
## class 2:  0.5640 0.4360
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.0070 0.9930
## class 2:  0.8272 0.1728
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.6979 0.3021
## class 2:  0.7582 0.2418
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.4403 0.1990 0.1536 0.2071
## class 2:  0.4993 0.2577 0.1065 0.1365
## 
## $BMI_cat
##           Underweight Healthy Weight Overweight Obesity
## class 1:       0.0322         0.2209     0.2437  0.5033
## class 2:       0.0088         0.2330     0.3702  0.3879
## 
## $bin_pain2
##           1 to 9 pain 10 pain
## class 1:       0.8002  0.1998
## class 2:       0.9764  0.0236
## 
## Estimated class population shares 
##  0.3473 0.6527 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3729 0.6271 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 21 
## residual degrees of freedom: 215 
## maximum log-likelihood: -1069.426 
##  
## AIC(2): 2180.851
## BIC(2): 2253.592
## G^2(2): 176.3656 (Likelihood ratio/deviance statistic) 
## X^2(2): 303.8513 (Chi-square goodness of fit) 
##  
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND 
##

LCA6 <- poLCA(f3, data=df6_lca, nclass=3) #AIC(3): 1967.918 BIC(3): 2057.978 #this and LCA5 are best

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.7860 0.2140
## class 2:  0.4941 0.5059
## class 3:  0.2173 0.7827
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.8657 0.1343
## class 2:  0.8835 0.1165
## class 3:  0.0000 1.0000
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.8897 0.1103
## class 2:  0.7119 0.2881
## class 3:  0.7081 0.2919
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.1135 0.3591 0.4261 0.1014
## class 2:  0.6198 0.2297 0.0000 0.1505
## class 3:  0.4516 0.1984 0.1526 0.1973
## 
## $BMI_cat
##           Underweight Healthy Weight Overweight Obesity
## class 1:       0.0300         0.0000     0.5419  0.4281
## class 2:       0.0000         0.3044     0.3207  0.3749
## class 3:       0.0324         0.2272     0.2480  0.4924
## 
## $bin_pain2
##           1 to 9 pain 10 pain
## class 1:       1.0000  0.0000
## class 2:       0.9688  0.0312
## class 3:       0.8167  0.1833
## 
## Estimated class population shares 
##  0.1512 0.4658 0.3831 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1229 0.4364 0.4407 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 32 
## residual degrees of freedom: 204 
## maximum log-likelihood: -1060.64 
##  
## AIC(3): 2185.28
## BIC(3): 2296.122
## G^2(3): 158.7938 (Likelihood ratio/deviance statistic) 
## X^2(3): 261.4029 (Chi-square goodness of fit) 
##

# Calculate entropy (3-class mode)l- values closer to 1.0 indicate greater separation of the classes.
entropy<-function (p) sum(-p*log(p))
error_prior <- entropy(LCA6$P) # Class proportions
error_post <- mean(apply(LCA6$posterior, 1, entropy))
(error_prior - error_post) / error_prior

## [1] NaN

#predicted class membership is in:
LCA6$predclass[1:30]

##  [1] 3 3 3 1 3 3 2 2 1 3 2 1 3 3 3 1 3 2 1 1 3 2 3 2 2 1 3 3 3 2

#add variable to data set with all variables so it can be used as predictor variable:
df6_lca$class <- LCA6$predclass

ggplot(df6_lca, aes(x = NRS_pain, fill = as.factor(class))) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across m6", y = "Density", title = "Pain m6 by latent class")

ggplot(df6_lca, aes(x = sqrt.conc, fill = as.factor(class))) +
  geom_density(alpha = 0.5) +
  labs(x = "e2 Across m6", y = "Density", title = "e2 m6 by latent class")

ggplot(df6_lca, aes(x = BMI, fill = as.factor(class))) +
  geom_density(alpha = 0.5) +
  labs(x = "bmi Across m6", y = "Density", title = "bmi m6 by latent class")

#adding the E2_confidence feature changes the E2 concentration binning by class
plot(LCA5)

plot(LCA6)

plotting affinity classes

f2a <- cbind(Widespread_pain_2016,Axial_Pain, postmenopause, e2_confidence, BMI_cat,bin_pain2)~e2_bin

nes2a <- poLCA(f2a,df6_lca,nclass=3,nrep=5)

## Model 1: llik = -1040.359 ... best llik = -1040.359
## Model 2: llik = -1067.829 ... best llik = -1040.359
## Model 3: llik = -1041.097 ... best llik = -1040.359
## Model 4: llik = -1041.097 ... best llik = -1040.359
## Model 5: llik = -1040.359 ... best llik = -1040.359
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Widespread_pain_2016
##            Pr(1)  Pr(2)
## class 1:  0.1806 0.8194
## class 2:  0.9964 0.0036
## class 3:  0.4113 0.5887
## 
## $Axial_Pain
##            Pr(1)  Pr(2)
## class 1:  0.3937 0.6063
## class 2:  1.0000 0.0000
## class 3:  0.4817 0.5183
## 
## $postmenopause
##            Pr(1)  Pr(2)
## class 1:  0.4293 0.5707
## class 2:  0.7538 0.2462
## class 3:  0.9742 0.0258
## 
## $e2_confidence
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.2686 0.2932 0.1085 0.3296
## class 2:  0.3384 0.3507 0.1987 0.1122
## class 3:  0.6996 0.1489 0.1047 0.0468
## 
## $BMI_cat
##           Underweight Healthy Weight Overweight Obesity
## class 1:       0.0142         0.2075     0.4054  0.3729
## class 2:       0.0222         0.0821     0.4658  0.4299
## class 3:       0.0171         0.3028     0.2094  0.4707
## 
## $bin_pain2
##           1 to 9 pain 10 pain
## class 1:       0.8429  0.1571
## class 2:       1.0000  0.0000
## class 3:       0.9394  0.0606
## 
## Estimated class population shares 
##  0.3626 0.1787 0.4588 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3263 0.1992 0.4746 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)    -1.51919     0.81303   -1.869     0.063
## e2_bin          0.48322     0.44740    1.080     0.281
## ========================================================= 
## 3 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)    -5.18142     1.44477   -3.586         0
## e2_bin          2.62987     0.66141    3.976         0
## ========================================================= 
## number of observations: 236 
## number of estimated parameters: 34 
## residual degrees of freedom: 202 
## maximum log-likelihood: -1040.359 
##  
## AIC(3): 2148.718
## BIC(3): 2266.488
## X^2(3): 256.2171 (Chi-square goodness of fit) 
##  
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND 
##  
## ALERT: estimation algorithm automatically restarted with new initial values 
##

pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)

matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
main="e2 as a predictor of pain affinity class",
xlab="estrogen category ",
ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")

summary table

length(unique(e2_full_long[["PID"]])) #432 PID

## [1] 432

e2_full_long %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
  #add_overall() %>%
  add_p()%>%
  bold_p()

**Full Table: Features by Estrogen Bin, ALL TIME POINTS**
Characteristic	Relative Estrogen Bin			p-value²
Characteristic	high E2, N = 357¹	low E2, N = 273¹	mid E2, N = 666¹	p-value²
ED_Age	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade				0.001
Advanced Degree	18 (5.0%)	18 (6.6%)	51 (7.7%)
Bachelor's Degree	60 (17%)	69 (25%)	90 (14%)
High School or Less	138 (39%)	93 (34%)	246 (37%)
Some College or Associate Degree	141 (39%)	93 (34%)	279 (42%)
BMI	31 (10)	28 (6)	30 (8)	0.005
Unknown	18	21	57
Widespread_pain_2016	152 (44%)	120 (46%)	232 (36%)	0.009
Unknown	9	11	25
Axial_Pain	169 (62%)	127 (58%)	289 (56%)	0.3
Unknown	86	54	152
ED_Pain	7 (2)	6 (3)	6 (3)	0.005
NRS_pain	4.5 (3.3)	4.4 (3.2)	4.0 (3.0)	0.028
Unknown	9	9	24
bin_pain				0.2
0 pain	77 (22%)	42 (16%)	123 (19%)
Any pain	271 (78%)	222 (84%)	519 (81%)
Unknown	9	9	24
bin_pain2				0.020
0 pain	77 (22%)	42 (16%)	123 (19%)
1 to 9 pain	247 (71%)	197 (75%)	490 (76%)
10 pain	24 (6.9%)	25 (9.5%)	29 (4.5%)
Unknown	9	9	24
postmenopause	27 (7.6%)	114 (42%)	153 (23%)	<0.001
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

# NRS pain is significant by estrogen bin?

#when all time points are combined, pain is significant across the different E2 categories
#but this has limited interpretability because pain above is across all time points

#summary table for MONTH 2 
#this is the only one sig for some reason
e2_full_long %>%
  filter(timepoint == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()

**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**
Characteristic	Relative Estrogen Bin			p-value²
Characteristic	high E2, N = 119¹	low E2, N = 91¹	mid E2, N = 222¹	p-value²
ED_Age	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade				0.3
Advanced Degree	6 (5.0%)	6 (6.6%)	17 (7.7%)
Bachelor's Degree	20 (17%)	23 (25%)	30 (14%)
High School or Less	46 (39%)	31 (34%)	82 (37%)
Some College or Associate Degree	47 (39%)	31 (34%)	93 (42%)
BMI	31 (10)	28 (6)	30 (8)	0.2
Unknown	6	7	19
Widespread_pain_2016	57 (50%)	40 (47%)	77 (36%)	0.043
Unknown	5	5	11
Axial_Pain	77 (79%)	68 (87%)	156 (85%)	0.3
Unknown	21	13	38
ED_Pain	7 (2)	6 (3)	6 (3)	0.2
NRS_pain	5.1 (3.1)	4.7 (3.0)	4.4 (2.9)	0.095
Unknown	5	4	11
bin_pain				0.7
0 pain	16 (14%)	9 (10%)	27 (13%)
Any pain	98 (86%)	78 (90%)	184 (87%)
Unknown	5	4	11
bin_pain2				0.5
0 pain	16 (14%)	9 (10%)	27 (13%)
1 to 9 pain	87 (76%)	71 (82%)	173 (82%)
10 pain	11 (9.6%)	7 (8.0%)	11 (5.2%)
Unknown	5	4	11
postmenopause	9 (7.6%)	38 (42%)	51 (23%)	<0.001
Plate.Number	11 (6)	12 (6)	10 (7)	0.2
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#summary table for MONTH 3
e2_full_long %>%
  filter(timepoint == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()

**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**
Characteristic	Relative Estrogen Bin			p-value²
Characteristic	high E2, N = 119¹	low E2, N = 91¹	mid E2, N = 222¹	p-value²
ED_Age	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade				0.3
Advanced Degree	6 (5.0%)	6 (6.6%)	17 (7.7%)
Bachelor's Degree	20 (17%)	23 (25%)	30 (14%)
High School or Less	46 (39%)	31 (34%)	82 (37%)
Some College or Associate Degree	47 (39%)	31 (34%)	93 (42%)
BMI	31 (10)	28 (6)	30 (8)	0.2
Unknown	6	7	19
Widespread_pain_2016	47 (41%)	39 (45%)	77 (37%)	0.4
Unknown	4	5	13
Axial_Pain	48 (53%)	31 (44%)	67 (40%)	0.12
Unknown	29	20	55
ED_Pain	7 (2)	6 (3)	6 (3)	0.2
NRS_pain	4.4 (3.3)	4.5 (3.2)	3.9 (2.9)	0.3
Unknown	4	5	13
bin_pain				0.6
0 pain	25 (22%)	14 (16%)	38 (18%)
Any pain	90 (78%)	72 (84%)	171 (82%)
Unknown	4	5	13
bin_pain2				0.3
0 pain	25 (22%)	14 (16%)	38 (18%)
1 to 9 pain	82 (71%)	64 (74%)	163 (78%)
10 pain	8 (7.0%)	8 (9.3%)	8 (3.8%)
Unknown	4	5	13
postmenopause	9 (7.6%)	38 (42%)	51 (23%)	<0.001
Plate.Number	11 (6)	12 (6)	10 (7)	0.2
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#summary table for MONTH 6
e2_full_long %>%
  filter(timepoint == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
  #add_overall() %>%
  add_p() %>%
  bold_p()

**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**
Characteristic	Relative Estrogen Bin			p-value²
Characteristic	high E2, N = 119¹	low E2, N = 91¹	mid E2, N = 222¹	p-value²
ED_Age	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade				0.3
Advanced Degree	6 (5.0%)	6 (6.6%)	17 (7.7%)
Bachelor's Degree	20 (17%)	23 (25%)	30 (14%)
High School or Less	46 (39%)	31 (34%)	82 (37%)
Some College or Associate Degree	47 (39%)	31 (34%)	93 (42%)
BMI	31 (10)	28 (6)	30 (8)	0.2
Unknown	6	7	19
Widespread_pain_2016	48 (40%)	41 (46%)	78 (35%)	0.2
Unknown	0	1	1
Axial_Pain	44 (53%)	28 (40%)	66 (40%)	0.14
Unknown	36	21	59
ED_Pain	7 (2)	6 (3)	6 (3)	0.2
NRS_pain	3.9 (3.3)	4.1 (3.4)	3.5 (3.0)	0.4
bin_pain				0.3
0 pain	36 (30%)	19 (21%)	58 (26%)
Any pain	83 (70%)	72 (79%)	164 (74%)
bin_pain2				0.12
0 pain	36 (30%)	19 (21%)	58 (26%)
1 to 9 pain	78 (66%)	62 (68%)	154 (69%)
10 pain	5 (4.2%)	10 (11%)	10 (4.5%)
postmenopause	9 (7.6%)	38 (42%)	51 (23%)	<0.001
Plate.Number	11 (6)	12 (6)	10 (7)	0.2
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

what if I only look at low and high e2 levels? doesn’t work

e2_full_long %>%
  filter(e2_bin %in% c("high E2", "low E2")) %>%
  select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
  tbl_summary(by = e2_bin,
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p() %>% bold_p() #%>% add_overall()

Characteristic	high E2, N = 357¹	low E2, N = 273¹	p-value²
ED_Age	33 (11)	44 (13)	<0.001
ED_HighestGrade			0.042
Advanced Degree	18 (5.0%)	18 (6.6%)
Bachelor's Degree	60 (17%)	69 (25%)
High School or Less	138 (39%)	93 (34%)
Some College or Associate Degree	141 (39%)	93 (34%)
BMI	31 (10)	28 (6)	0.002
Unknown	18	21
Widespread_pain_2016	152 (44%)	120 (46%)	0.6
Unknown	9	11
Axial_Pain	169 (62%)	127 (58%)	0.3
Unknown	86	54
ED_Pain	7 (2)	6 (3)	0.003
NRS_pain	4.5 (3.3)	4.4 (3.2)	0.9
Unknown	9	9
bin_pain			0.054
0 pain	77 (22%)	42 (16%)
Any pain	271 (78%)	222 (84%)
Unknown	9	9
bin_pain2			0.11
0 pain	77 (22%)	42 (16%)
1 to 9 pain	247 (71%)	197 (75%)
10 pain	24 (6.9%)	25 (9.5%)
Unknown	9	9
postmenopause	27 (7.6%)	114 (42%)	<0.001
Plate.Number	11 (6)	12 (6)	0.088
¹ Mean (SD); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test

#nothing. also repeated for each timepoint and nothing sig

i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case

visuals

ggplot(e2_full_long, aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

## Warning: Removed 42 rows containing non-finite outside the scale range
## (`stat_density()`).

#high and mid E2 peak at 0 pain. Everything mostly falls off at pain 10.

#BELOW: PAIN AT 2 MONTHS. Mostly mid level pain everyone
ggplot(data = e2_full_long %>% filter(timepoint == 2), aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_density()`).

#BELOW: PAIN AT 3 MONTHS, mostly mid level pain, high and mid e2 start to bounce back down to zero
ggplot(data = e2_full_long %>% filter(timepoint == 3), aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_density()`).

#BELOW: PAIN AT 6 MONTHS. Mid and high e2 have a strong peak at 0 pain, and high e2 has another strong bump at ~6 pain. Low e2 is mildly right skewed towards 10. 
ggplot(data = e2_full_long %>% filter(timepoint == 6), aes(x = NRS_pain, fill = e2_bin)) +
  geom_density(alpha = 0.5) +
  labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
  scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))

Ok this is very useful!! We see that low E2 has fewer 0s and more 10s at each time point. This makes me think a different pain stratification should take place that focuses on the extremes of the pain NRS scale.

#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 2, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 2 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).

#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 3, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 3 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).

#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 6, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
  geom_histogram(binwidth = .6, position = "dodge", color="black") +
  scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
  labs(title = "Pain at Month 6 by High and Low E2",
       x = "Pain Score",
       y = "Frequency")

chi square ?

table(e2_full_long$e2_bin, as.factor(e2_full_long$NRS_pain))

##          
##             0   1   2   3   4   5   6   7   8   9  10
##   high E2  77  17  18  20  32  34  45  30  38  13  24
##   low E2   42  23  17  28  27  27  25  20  25   5  25
##   mid E2  123  56  49  67  57  86  68  51  31  25  29

table(e2_full_long$e2_bin, e2_full_long$bin_pain)

##          
##           0 pain Any pain
##   high E2     77      271
##   low E2      42      222
##   mid E2     123      519

chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$NRS_pain))

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and as.factor(e2_full_long$NRS_pain)
## X-squared = 44.635, df = 20, p-value = 0.001236

chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 3.7424, df = 2, p-value = 0.1539

kruskal.test(NRS_pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459

## 
##  Kruskal-Wallis rank sum test
## 
## data:  NRS_pain by e2_bin
## Kruskal-Wallis chi-squared = 7.1609, df = 2, p-value = 0.02786

summary table

#each time point
e2_full_long %>%
  #filter(time == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
 # add_overall() %>%
  add_p() %>%
  bold_p()

**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**
Characteristic	Relative Estrogen Bin			p-value²
Characteristic	high E2, N = 357¹	low E2, N = 273¹	mid E2, N = 666¹	p-value²
ED_Age	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade				0.001
Advanced Degree	18 (5.0%)	18 (6.6%)	51 (7.7%)
Bachelor's Degree	60 (17%)	69 (25%)	90 (14%)
High School or Less	138 (39%)	93 (34%)	246 (37%)
Some College or Associate Degree	141 (39%)	93 (34%)	279 (42%)
BMI	31 (10)	28 (6)	30 (8)	0.005
Unknown	18	21	57
Race				<0.001
Hispanic	42 (12%)	54 (20%)	96 (15%)
Non-Hispanic Black	165 (47%)	96 (35%)	324 (49%)
Non-Hispanic Other	33 (9.3%)	15 (5.5%)	21 (3.2%)
Non-Hispanic White	114 (32%)	108 (40%)	219 (33%)
Unknown	3	0	6
sqrt.conc	9.63 (3.08)	3.01 (0.57)	4.85 (0.74)	<0.001
ED_Pain	7 (2)	6 (3)	6 (3)	0.005
bin_pain				0.2
0 pain	77 (22%)	42 (16%)	123 (19%)
Any pain	271 (78%)	222 (84%)	519 (81%)
Unknown	9	9	24
NRS_pain	4.5 (3.3)	4.4 (3.2)	4.0 (3.0)	0.028
Unknown	9	9	24
bin_pain2				0.020
0 pain	77 (22%)	42 (16%)	123 (19%)
1 to 9 pain	247 (71%)	197 (75%)	490 (76%)
10 pain	24 (6.9%)	25 (9.5%)	29 (4.5%)
Unknown	9	9	24
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#bin pain 2 and bin pain 3 significant (p<.001)

#month 2
e2_full_long %>%
  filter(timepoint == 2) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'

**WK8 Table: Features by Estrogen Bin, 8 weeks**
Characteristic	Overall, N = 432¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 432¹	high E2, N = 119¹	low E2, N = 91¹	mid E2, N = 222¹	p-value²
ED_Age	37 (13)	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade					0.3
Advanced Degree	29 (6.7%)	6 (5.0%)	6 (6.6%)	17 (7.7%)
Bachelor's Degree	73 (17%)	20 (17%)	23 (25%)	30 (14%)
High School or Less	159 (37%)	46 (39%)	31 (34%)	82 (37%)
Some College or Associate Degree	171 (40%)	47 (39%)	31 (34%)	93 (42%)
BMI	30 (8)	31 (10)	28 (6)	30 (8)	0.2
Unknown	32	6	7	19
Race
Hispanic	64 (15%)	14 (12%)	18 (20%)	32 (15%)
Non-Hispanic Black	195 (45%)	55 (47%)	32 (35%)	108 (49%)
Non-Hispanic Other	23 (5.4%)	11 (9.3%)	5 (5.5%)	7 (3.2%)
Non-Hispanic White	147 (34%)	38 (32%)	36 (40%)	73 (33%)
Unknown	3	1	0	2
sqrt.conc	5.78 (3.02)	9.63 (3.09)	3.01 (0.57)	4.85 (0.74)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.2
bin_pain					0.7
0 pain	52 (13%)	16 (14%)	9 (10%)	27 (13%)
Any pain	360 (87%)	98 (86%)	78 (90%)	184 (87%)
Unknown	20	5	4	11
NRS_pain	4.7 (3.0)	5.1 (3.1)	4.7 (3.0)	4.4 (2.9)	0.095
Unknown	20	5	4	11
bin_pain2					0.5
0 pain	52 (13%)	16 (14%)	9 (10%)	27 (13%)
1 to 9 pain	331 (80%)	87 (76%)	71 (82%)	173 (82%)
10 pain	29 (7.0%)	11 (9.6%)	7 (8.0%)	11 (5.2%)
Unknown	20	5	4	11
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#bin pain 3 significant (p=0.047) but not bin_pain2

#month 3
e2_full_long %>%
  filter(timepoint == 3) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'

**M3 Table: Features by Estrogen Bin, 3 months**
Characteristic	Overall, N = 432¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 432¹	high E2, N = 119¹	low E2, N = 91¹	mid E2, N = 222¹	p-value²
ED_Age	37 (13)	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade					0.3
Advanced Degree	29 (6.7%)	6 (5.0%)	6 (6.6%)	17 (7.7%)
Bachelor's Degree	73 (17%)	20 (17%)	23 (25%)	30 (14%)
High School or Less	159 (37%)	46 (39%)	31 (34%)	82 (37%)
Some College or Associate Degree	171 (40%)	47 (39%)	31 (34%)	93 (42%)
BMI	30 (8)	31 (10)	28 (6)	30 (8)	0.2
Unknown	32	6	7	19
Race
Hispanic	64 (15%)	14 (12%)	18 (20%)	32 (15%)
Non-Hispanic Black	195 (45%)	55 (47%)	32 (35%)	108 (49%)
Non-Hispanic Other	23 (5.4%)	11 (9.3%)	5 (5.5%)	7 (3.2%)
Non-Hispanic White	147 (34%)	38 (32%)	36 (40%)	73 (33%)
Unknown	3	1	0	2
sqrt.conc	5.78 (3.02)	9.63 (3.09)	3.01 (0.57)	4.85 (0.74)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.2
bin_pain					0.6
0 pain	77 (19%)	25 (22%)	14 (16%)	38 (18%)
Any pain	333 (81%)	90 (78%)	72 (84%)	171 (82%)
Unknown	22	4	5	13
NRS_pain	4.2 (3.1)	4.4 (3.3)	4.5 (3.2)	3.9 (2.9)	0.3
Unknown	22	4	5	13
bin_pain2					0.3
0 pain	77 (19%)	25 (22%)	14 (16%)	38 (18%)
1 to 9 pain	309 (75%)	82 (71%)	64 (74%)	163 (78%)
10 pain	24 (5.9%)	8 (7.0%)	8 (9.3%)	8 (3.8%)
Unknown	22	4	5	13
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#bin_pain3 is sig (p=0.012) but not bin pain 2 

#month 6
e2_full_long %>%
  filter(timepoint == 6) %>%
  select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
  tbl_summary(by = e2_bin,
  statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
  modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
  add_overall() %>%
  add_p() %>%
  bold_p()

## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6.  LDKEY=620 is too small for this problem,
##   (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'

**M6 Table: Features by Estrogen Bin, 6 months**
Characteristic	Overall, N = 432¹	Relative Estrogen Bin			p-value²
Characteristic	Overall, N = 432¹	high E2, N = 119¹	low E2, N = 91¹	mid E2, N = 222¹	p-value²
ED_Age	37 (13)	33 (11)	44 (13)	37 (14)	<0.001
ED_HighestGrade					0.3
Advanced Degree	29 (6.7%)	6 (5.0%)	6 (6.6%)	17 (7.7%)
Bachelor's Degree	73 (17%)	20 (17%)	23 (25%)	30 (14%)
High School or Less	159 (37%)	46 (39%)	31 (34%)	82 (37%)
Some College or Associate Degree	171 (40%)	47 (39%)	31 (34%)	93 (42%)
BMI	30 (8)	31 (10)	28 (6)	30 (8)	0.2
Unknown	32	6	7	19
Race
Hispanic	64 (15%)	14 (12%)	18 (20%)	32 (15%)
Non-Hispanic Black	195 (45%)	55 (47%)	32 (35%)	108 (49%)
Non-Hispanic Other	23 (5.4%)	11 (9.3%)	5 (5.5%)	7 (3.2%)
Non-Hispanic White	147 (34%)	38 (32%)	36 (40%)	73 (33%)
Unknown	3	1	0	2
sqrt.conc	5.78 (3.02)	9.63 (3.09)	3.01 (0.57)	4.85 (0.74)	<0.001
ED_Pain	6 (3)	7 (2)	6 (3)	6 (3)	0.2
bin_pain					0.3
0 pain	113 (26%)	36 (30%)	19 (21%)	58 (26%)
Any pain	319 (74%)	83 (70%)	72 (79%)	164 (74%)
NRS_pain	3.7 (3.2)	3.9 (3.3)	4.1 (3.4)	3.5 (3.0)	0.4
bin_pain2					0.12
0 pain	113 (26%)	36 (30%)	19 (21%)	58 (26%)
1 to 9 pain	294 (68%)	78 (66%)	62 (68%)	154 (69%)
10 pain	25 (5.8%)	5 (4.2%)	10 (11%)	10 (4.5%)
¹ Mean (SD); n (%)
² Kruskal-Wallis rank sum test; Pearson’s Chi-squared test

#bin pain 2 sig (p=0.021) and bin pain 3 sig (p<.001)

stats for new categories

table(e2_full_long$e2_bin, e2_full_long$bin_pain2)

##          
##           0 pain 1 to 9 pain 10 pain
##   high E2     77         247      24
##   low E2      42         197      25
##   mid E2     123         490      29

round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain2), margin = 1) * 100,2)

##          
##           0 pain 1 to 9 pain 10 pain
##   high E2  22.13       70.98    6.90
##   low E2   15.91       74.62    9.47
##   mid E2   19.16       76.32    4.52

chisq.test(e2_full_long[e2_full_long$timepoint == 2, "e2_bin"], e2_full_long[e2_full_long$timepoint == 2, "bin_pain2"]) # p-value = 0.03053

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$timepoint == 2, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 2, "bin_pain2"]
## X-squared = 3.0896, df = 4, p-value = 0.5429

chisq.test(e2_full_long[e2_full_long$timepoint == 3, "e2_bin"], e2_full_long[e2_full_long$timepoint == 3, "bin_pain2"]) #p-value = 0.007257

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$timepoint == 3, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 3, "bin_pain2"]
## X-squared = 4.7671, df = 4, p-value = 0.312

chisq.test(e2_full_long[e2_full_long$timepoint == 6, "e2_bin"], e2_full_long[e2_full_long$timepoint == 6, "bin_pain2"]) #p-value = 7.419e-05

## 
##  Pearson's Chi-squared test
## 
## data:  e2_full_long[e2_full_long$timepoint == 6, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 6, "bin_pain2"]
## X-squared = 7.3016, df = 4, p-value = 0.1208

redo model from above

MARCH 2024 NEW E2 DATA CODE GLS MODEL

e2_model <- gls(NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ timepoint | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)

summary(e2_model)

## Generalized least squares fit by REML
##   Model: NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade +      BMI + ED_Pain + Race + Plate.Number + e2_bin 
##   Data: e2_full_long_gls_complete 
##        AIC      BIC    logLik
##   3117.277 3194.474 -1541.638
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~timepoint | PID 
##  Parameter estimate(s):
##      Phi1 
## 0.7112736 
## 
## Coefficients:
##                                                      Value Std.Error   t-value
## (Intercept)                                      2.2400046 1.1756720  1.905297
## timepoint                                       -0.1500786 0.0480453 -3.123692
## sqrt.conc                                       -0.0013510 0.0759092 -0.017797
## ED_Age                                           0.0355369 0.0103781  3.424209
## ED_HighestGradeBachelor's Degree                 0.5937591 0.5415318  1.096444
## ED_HighestGradeHigh School or Less               1.4262587 0.5198131  2.743791
## ED_HighestGradeSome College or Associate Degree  1.4480613 0.5061676  2.860833
## BMI                                              0.0267255 0.0147797  1.808262
## ED_Pain                                          0.1949444 0.0518378  3.760663
## RaceNon-Hispanic Black                          -0.1159866 0.3971122 -0.292075
## RaceNon-Hispanic Other                          -0.6842188 0.6211972 -1.101452
## RaceNon-Hispanic White                          -0.7103415 0.4011102 -1.770938
## Plate.Number                                     0.0051389 0.0203987  0.251921
## e2_binlow E2                                    -0.3251953 0.6294793 -0.516610
## e2_binmid E2                                    -0.7181006 0.4736169 -1.516205
##                                                 p-value
## (Intercept)                                      0.0572
## timepoint                                        0.0019
## sqrt.conc                                        0.9858
## ED_Age                                           0.0007
## ED_HighestGradeBachelor's Degree                 0.2733
## ED_HighestGradeHigh School or Less               0.0062
## ED_HighestGradeSome College or Associate Degree  0.0044
## BMI                                              0.0710
## ED_Pain                                          0.0002
## RaceNon-Hispanic Black                           0.7703
## RaceNon-Hispanic Other                           0.2711
## RaceNon-Hispanic White                           0.0770
## Plate.Number                                     0.8012
## e2_binlow E2                                     0.6056
## e2_binmid E2                                     0.1299
## 
##  Correlation: 
##                                                 (Intr) timpnt sqrt.c ED_Age
## timepoint                                       -0.163                     
## sqrt.conc                                       -0.689  0.000              
## ED_Age                                          -0.317  0.000  0.095       
## ED_HighestGradeBachelor's Degree                -0.150  0.000 -0.055  0.039
## ED_HighestGradeHigh School or Less              -0.253  0.000  0.010  0.170
## ED_HighestGradeSome College or Associate Degree -0.187  0.000 -0.046  0.088
## BMI                                             -0.418  0.000  0.113 -0.154
## ED_Pain                                         -0.272  0.000 -0.035  0.047
## RaceNon-Hispanic Black                          -0.109  0.000  0.029 -0.142
## RaceNon-Hispanic Other                          -0.112  0.000  0.027 -0.036
## RaceNon-Hispanic White                          -0.129  0.000  0.028 -0.181
## Plate.Number                                    -0.175  0.000  0.014  0.006
## e2_binlow E2                                    -0.628  0.000  0.803 -0.103
## e2_binmid E2                                    -0.660  0.000  0.771 -0.022
##                                                 ED_HGD ED_SoL EDCoAD BMI   
## timepoint                                                                  
## sqrt.conc                                                                  
## ED_Age                                                                     
## ED_HighestGradeBachelor's Degree                                           
## ED_HighestGradeHigh School or Less               0.723                     
## ED_HighestGradeSome College or Associate Degree  0.751  0.833              
## BMI                                             -0.113 -0.057 -0.099       
## ED_Pain                                         -0.116 -0.213 -0.152  0.029
## RaceNon-Hispanic Black                          -0.190 -0.318 -0.322  0.004
## RaceNon-Hispanic Other                          -0.073 -0.177 -0.148 -0.047
## RaceNon-Hispanic White                          -0.198 -0.212 -0.219 -0.018
## Plate.Number                                    -0.086 -0.016 -0.085  0.109
## e2_binlow E2                                    -0.071 -0.007 -0.039  0.191
## e2_binmid E2                                    -0.026  0.013 -0.022  0.133
##                                                 ED_Pan RcN-HB RcN-HO RcN-HW
## timepoint                                                                  
## sqrt.conc                                                                  
## ED_Age                                                                     
## ED_HighestGradeBachelor's Degree                                           
## ED_HighestGradeHigh School or Less                                         
## ED_HighestGradeSome College or Associate Degree                            
## BMI                                                                        
## ED_Pain                                                                    
## RaceNon-Hispanic Black                           0.022                     
## RaceNon-Hispanic Other                           0.088  0.487              
## RaceNon-Hispanic White                           0.117  0.722  0.471       
## Plate.Number                                     0.056 -0.106 -0.124 -0.164
## e2_binlow E2                                     0.015  0.089  0.045  0.074
## e2_binmid E2                                     0.021  0.097  0.121  0.098
##                                                 Plt.Nm e2_bnlE2
## timepoint                                                      
## sqrt.conc                                                      
## ED_Age                                                         
## ED_HighestGradeBachelor's Degree                               
## ED_HighestGradeHigh School or Less                             
## ED_HighestGradeSome College or Associate Degree                
## BMI                                                            
## ED_Pain                                                        
## RaceNon-Hispanic Black                                         
## RaceNon-Hispanic Other                                         
## RaceNon-Hispanic White                                         
## Plate.Number                                                   
## e2_binlow E2                                    -0.058         
## e2_binmid E2                                    -0.011  0.829  
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.42631492 -0.72849950  0.00436402  0.71323539  2.35250577 
## 
## Residual standard error: 2.420916 
## Degrees of freedom: 708 total; 693 residual

#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain2,
                                        `0 pain` = "1",
                                        `1 to 9 pain` = "2",
                                        `10 pain` = "3") #new dimensions 1163 x 17

#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ timepoint | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2    p = 2.398452e-03 = 0.002398452

## # weights:  48 (30 variable)
## initial  value 1277.686092 
## iter  10 value 978.687050
## iter  20 value 828.822196
## iter  30 value 768.995579
## iter  40 value 768.314759
## final  value 768.314605 
## converged

#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE)

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	1.1476098	0.7192362	1.5955951	0.1105792	-0.2620673	2.5572868
2	timepoint	-0.1977753	0.0441436	-4.4802701	0.0000075	-0.2842952	-0.1112554
2	sqrt.conc	-0.0287966	0.0400877	-0.7183419	0.4725465	-0.1073670	0.0497737
2	ED_Age	0.0253562	0.0066856	3.7926427	0.0001491	0.0122526	0.0384598
2	ED_HighestGradeBachelor’s Degree	0.1368416	0.3705171	0.3693261	0.7118847	-0.5893586	0.8630418
2	ED_HighestGradeHigh School or Less	-0.1608027	0.3463921	-0.4642217	0.6424889	-0.8397188	0.5181133
2	ED_HighestGradeSome College or Associate Degree	-0.0484946	0.3412816	-0.1420956	0.8870045	-0.7173943	0.6204051
2	BMI	0.0115329	0.0098899	1.1661257	0.2435636	-0.0078510	0.0309168
2	ED_Pain	0.0911738	0.0316376	2.8818140	0.0039539	0.0291652	0.1531825
2	RaceNon-Hispanic Black	-0.5527326	0.2668155	-2.0715908	0.0383036	-1.0756814	-0.0297838
2	RaceNon-Hispanic Other	-0.3304876	0.3902482	-0.8468653	0.3970702	-1.0953600	0.4343847
2	RaceNon-Hispanic White	-0.3713711	0.2762933	-1.3441190	0.1789099	-0.9128961	0.1701539
2	Plate.Number	-0.0211626	0.0119249	-1.7746606	0.0759539	-0.0445349	0.0022097
2	e2_binlow E2	0.1952720	0.3712315	0.5260114	0.5988803	-0.5323284	0.9228724
2	e2_binmid E2	0.0093582	0.2724513	0.0343481	0.9725995	-0.5246365	0.5433529
3	(Intercept)	-3.8222607	1.6508923	-2.3152696	0.0205982	-7.0579502	-0.5865713
3	timepoint	-0.2117971	0.0815971	-2.5956444	0.0094414	-0.3717245	-0.0518697
3	sqrt.conc	-0.0327149	0.0825517	-0.3962960	0.6918867	-0.1945133	0.1290835
3	ED_Age	0.0573083	0.0119448	4.7977392	0.0000016	0.0338968	0.0807197
3	ED_HighestGradeBachelor’s Degree	1.0048357	1.1494872	0.8741600	0.3820311	-1.2481178	3.2577893
3	ED_HighestGradeHigh School or Less	1.6704468	1.0845166	1.5402684	0.1234949	-0.4551668	3.7960603
3	ED_HighestGradeSome College or Associate Degree	1.7184618	1.0819146	1.5883526	0.1122066	-0.4020517	3.8389754
3	BMI	-0.0240810	0.0196057	-1.2282606	0.2193491	-0.0625075	0.0143456
3	ED_Pain	0.2963213	0.0640951	4.6231501	0.0000038	0.1706972	0.4219455
3	RaceNon-Hispanic Black	-0.5835581	0.4182776	-1.3951455	0.1629720	-1.4033671	0.2362509
3	RaceNon-Hispanic Other	-0.5588189	0.6948961	-0.8041762	0.4212952	-1.9207901	0.8031523
3	RaceNon-Hispanic White	-0.8144662	0.4747860	-1.7154384	0.0862649	-1.7450297	0.1160973
3	Plate.Number	-0.0450055	0.0221329	-2.0334249	0.0420096	-0.0883852	-0.0016259
3	e2_binlow E2	0.1952197	0.6764448	0.2885967	0.7728900	-1.1305877	1.5210272
3	e2_binmid E2	-0.6145368	0.5141257	-1.1953045	0.2319681	-1.6222048	0.3931311

#e2_bin low E2 on category 3 (10 pain)  p = 2.398452e-03 = 0.002398452  
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain



# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 2, control = list(singular.ok = FALSE), data = e2_full_long)

## # weights:  45 (28 variable)
## initial  value 421.867119 
## iter  10 value 247.665633
## iter  20 value 222.152402
## iter  30 value 220.540297
## iter  40 value 220.461828
## final  value 220.461395 
## converged

tidy(model_m2, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	0.6332172	1.4645354	0.4323673	0.6654745	-2.2372194	3.5036537
2	sqrt.conc	-0.0439258	0.0817465	-0.5373413	0.5910319	-0.2041460	0.1162945
2	ED_Age	0.0345304	0.0143728	2.4024876	0.0162840	0.0063603	0.0627005
2	ED_HighestGradeBachelor’s Degree	0.0253283	0.7807131	0.0324425	0.9741191	-1.5048413	1.5554980
2	ED_HighestGradeHigh School or Less	-0.5408934	0.7316123	-0.7393170	0.4597145	-1.9748272	0.8930404
2	ED_HighestGradeSome College or Associate Degree	-0.1115448	0.7245922	-0.1539415	0.8776558	-1.5317195	1.3086299
2	BMI	-0.0011304	0.0200715	-0.0563180	0.9550884	-0.0404699	0.0382091
2	ED_Pain	0.2278132	0.0643531	3.5400523	0.0004000	0.1016835	0.3539429
2	RaceNon-Hispanic Black	-0.9946271	0.6589663	-1.5093747	0.1312030	-2.2861773	0.2969231
2	RaceNon-Hispanic Other	-0.8541552	0.8588881	-0.9944895	0.3199846	-2.5375450	0.8292346
2	RaceNon-Hispanic White	-0.9587317	0.6680304	-1.4351618	0.1512410	-2.2680471	0.3505838
2	Plate.Number	0.0168240	0.0253360	0.6640359	0.5066673	-0.0328336	0.0664816
2	e2_binlow E2	-0.0731302	0.7770463	-0.0941130	0.9250194	-1.5961130	1.4498527
2	e2_binmid E2	-0.1387671	0.5726973	-0.2423044	0.8085443	-1.2612332	0.9836990
3	(Intercept)	-15.6095805	1.6871690	-9.2519365	0.0000000	-18.9163710	-12.3027900
3	sqrt.conc	-0.0780987	0.1330539	-0.5869704	0.5572236	-0.3388795	0.1826821
3	ED_Age	0.0547499	0.0213335	2.5663871	0.0102764	0.0129371	0.0965627
3	ED_HighestGradeBachelor’s Degree	13.6045622	0.8144407	16.7041781	0.0000000	12.0082878	15.2008366
3	ED_HighestGradeHigh School or Less	13.5770128	0.6342322	21.4070066	0.0000000	12.3339405	14.8200851
3	ED_HighestGradeSome College or Associate Degree	14.1572732	0.6699970	21.1303518	0.0000000	12.8441031	15.4704432
3	BMI	-0.0496474	0.0349347	-1.4211501	0.1552731	-0.1181182	0.0188233
3	ED_Pain	0.4303770	0.1148114	3.7485562	0.0001779	0.2053508	0.6554032
3	RaceNon-Hispanic Black	-1.1582314	0.8419886	-1.3755904	0.1689485	-2.8084987	0.4920359
3	RaceNon-Hispanic Other	-1.7429841	1.3997560	-1.2452056	0.2130562	-4.4864555	1.0004873
3	RaceNon-Hispanic White	-1.1919065	0.9022291	-1.3210685	0.1864785	-2.9602431	0.5764301
3	Plate.Number	0.0146474	0.0386702	0.3787780	0.7048527	-0.0611448	0.0904396
3	e2_binlow E2	-0.7254233	1.1592525	-0.6257682	0.5314670	-2.9975163	1.5466698
3	e2_binmid E2	-0.9939188	0.8655141	-1.1483565	0.2508214	-2.6902953	0.7024577

#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 3, control = list(singular.ok = FALSE), data = e2_full_long)

## # weights:  45 (28 variable)
## initial  value 419.669894 
## iter  10 value 266.809692
## iter  20 value 247.485588
## iter  30 value 246.566597
## final  value 246.564576 
## converged

tidy(model_m3, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #no significance

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	0.7142391	1.2378680	0.5769913	0.5639453	-1.7119376	3.1404158
2	sqrt.conc	-0.0530729	0.0694125	-0.7646012	0.4445091	-0.1891189	0.0829731
2	ED_Age	0.0269198	0.0116536	2.3099983	0.0208882	0.0040792	0.0497604
2	ED_HighestGradeBachelor’s Degree	0.1492462	0.6786421	0.2199189	0.8259343	-1.1808679	1.4793603
2	ED_HighestGradeHigh School or Less	-0.2373798	0.6272214	-0.3784626	0.7050870	-1.4667113	0.9919516
2	ED_HighestGradeSome College or Associate Degree	-0.1810475	0.6186932	-0.2926289	0.7698058	-1.3936638	1.0315688
2	BMI	0.0204176	0.0175726	1.1619012	0.2452756	-0.0140240	0.0548592
2	ED_Pain	0.0504263	0.0553806	0.9105411	0.3625372	-0.0581177	0.1589703
2	RaceNon-Hispanic Black	-0.7860288	0.4907704	-1.6016223	0.1092391	-1.7479211	0.1758635
2	RaceNon-Hispanic Other	-0.2419856	0.7248566	-0.3338393	0.7385008	-1.6626784	1.1787072
2	RaceNon-Hispanic White	-0.6911603	0.5073726	-1.3622342	0.1731240	-1.6855924	0.3032717
2	Plate.Number	-0.0074184	0.0207205	-0.3580208	0.7203278	-0.0480297	0.0331930
2	e2_binlow E2	0.0100750	0.6477340	0.0155542	0.9875900	-1.2594603	1.2796103
2	e2_binmid E2	-0.0795187	0.4755080	-0.1672290	0.8671899	-1.0114973	0.8524599
3	(Intercept)	-3.8298945	2.5683366	-1.4911965	0.1359099	-8.8637417	1.2039528
3	sqrt.conc	-0.0088107	0.1510366	-0.0583348	0.9534819	-0.3048370	0.2872156
3	ED_Age	0.0541307	0.0219750	2.4632856	0.0137670	0.0110605	0.0972009
3	ED_HighestGradeBachelor’s Degree	-0.1025653	1.4258754	-0.0719314	0.9426565	-2.8972298	2.6920992
3	ED_HighestGradeHigh School or Less	0.4101094	1.2582336	0.3259406	0.7444693	-2.0559832	2.8762020
3	ED_HighestGradeSome College or Associate Degree	0.0358165	1.2661373	0.0282880	0.9774324	-2.4457670	2.5174000
3	BMI	-0.0028174	0.0340501	-0.0827416	0.9340570	-0.0695544	0.0639197
3	ED_Pain	0.3491565	0.1237802	2.8207789	0.0047907	0.1065518	0.5917611
3	RaceNon-Hispanic Black	-1.1091481	0.7432089	-1.4923773	0.1356003	-2.5658107	0.3475145
3	RaceNon-Hispanic Other	-0.7506290	1.3307111	-0.5640811	0.5726989	-3.3587749	1.8575169
3	RaceNon-Hispanic White	-0.8894535	0.8131545	-1.0938309	0.2740292	-2.4832071	0.7043000
3	Plate.Number	-0.0826449	0.0446737	-1.8499656	0.0643185	-0.1702038	0.0049140
3	e2_binlow E2	0.3192749	1.2475478	0.2559220	0.7980111	-2.1258739	2.7644238
3	e2_binmid E2	-0.6698535	0.9487884	-0.7060093	0.4801823	-2.5294445	1.1897376

#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 6, control = list(singular.ok = FALSE), data = e2_full_long)

## # weights:  45 (28 variable)
## initial  value 436.149079 
## iter  10 value 307.224469
## iter  20 value 284.418287
## iter  30 value 282.243213
## iter  40 value 282.104992
## iter  50 value 282.103013
## iter  50 value 282.103010
## iter  50 value 282.103010
## final  value 282.103010 
## converged

tidy(model_m6, conf.int = TRUE) %>% 
  kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance

y.level	term	estimate	std.error	statistic	p.value	conf.low	conf.high
2	(Intercept)	0.0712850	1.0728075	0.0664472	0.9470218	-2.0313790	2.1739491
2	sqrt.conc	-0.0006349	0.0632923	-0.0100314	0.9919963	-0.1246856	0.1234158
2	ED_Age	0.0199455	0.0101454	1.9659702	0.0493021	0.0000609	0.0398301
2	ED_HighestGradeBachelor’s Degree	0.1881553	0.5495107	0.3424051	0.7320460	-0.8888659	1.2651764
2	ED_HighestGradeHigh School or Less	0.1257468	0.5203582	0.2416543	0.8090481	-0.8941366	1.1456302
2	ED_HighestGradeSome College or Associate Degree	0.0885682	0.5102420	0.1735807	0.8621950	-0.9114878	1.0886241
2	BMI	0.0121089	0.0152307	0.7950334	0.4265941	-0.0177427	0.0419605
2	ED_Pain	0.0421307	0.0495129	0.8509044	0.3948225	-0.0549128	0.1391742
2	RaceNon-Hispanic Black	-0.2452478	0.3799673	-0.6454446	0.5186391	-0.9899700	0.4994744
2	RaceNon-Hispanic Other	-0.2149588	0.5827748	-0.3688541	0.7122365	-1.3571764	0.9272587
2	RaceNon-Hispanic White	0.0861751	0.3968412	0.2171527	0.8280893	-0.6916193	0.8639695
2	Plate.Number	-0.0534691	0.0185320	-2.8852326	0.0039112	-0.0897911	-0.0171471
2	e2_binlow E2	0.4898350	0.5723151	0.8558834	0.3920623	-0.6318820	1.6115520
2	e2_binmid E2	0.1372568	0.4195159	0.3271791	0.7435324	-0.6849792	0.9594929
3	(Intercept)	-17.8847896	1.8883086	-9.4713277	0.0000000	-21.5858064	-14.1837727
3	sqrt.conc	-0.0087239	0.1694346	-0.0514886	0.9589362	-0.3408097	0.3233618
3	ED_Age	0.0756291	0.0214585	3.5244337	0.0004244	0.0335712	0.1176870
3	ED_HighestGradeBachelor’s Degree	12.1648748	1.0485328	11.6018063	0.0000000	10.1097882	14.2199614
3	ED_HighestGradeHigh School or Less	13.7826016	0.6979667	19.7467889	0.0000000	12.4146120	15.1505913
3	ED_HighestGradeSome College or Associate Degree	13.7298894	0.7384538	18.5927530	0.0000000	12.2825465	15.1772323
3	BMI	-0.0246015	0.0351234	-0.7004303	0.4836586	-0.0934421	0.0442391
3	ED_Pain	0.1905089	0.1038627	1.8342384	0.0666186	-0.0130582	0.3940761
3	RaceNon-Hispanic Black	0.1621323	0.7508772	0.2159238	0.8290471	-1.3095600	1.6338245
3	RaceNon-Hispanic Other	0.5361732	1.0952850	0.4895285	0.6244676	-1.6105459	2.6828924
3	RaceNon-Hispanic White	-0.9740496	0.9465993	-1.0289989	0.3034802	-2.8293501	0.8812509
3	Plate.Number	-0.0650576	0.0379438	-1.7145790	0.0864224	-0.1394260	0.0093108
3	e2_binlow E2	1.0543342	1.3008846	0.8104748	0.4176673	-1.4953528	3.6040211
3	e2_binmid E2	-0.1763840	0.9934211	-0.1775521	0.8590748	-2.1234535	1.7706856

#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)

miRNA

miRNA data

#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned) 
#miRNA_cleaned dimensions 596 x 1552

Link miRNA with keys

#sample ID to inventory ID and PID Keys
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")


ID2 <- ID_link %>% 
  right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub")) 

ID_miRNA <- miRNA_cleaned %>%
  mutate(sample=as.numeric(sample)) %>% 
  left_join(ID2,by="sample") #596 samples

length(unique(ID_miRNA$PID)) # unique PID 294

## [1] 294

subset to only the timepoint 0/ED visit miRNA values

ID_miRNA_T0 <- ID_miRNA %>% 
  filter(`ALT ID` == "T0") #dimensions 294 x 1557

miRNA

NEW MARCH 2024 CODE FOR miRNA

none of the PIDs match up between the new estrogen data and the miRNA data

#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)

#e2_wide_all<- e2_full_wide %>% 
 # inner_join(ID_miRNA,by="PID") 

#E2_long_all<- e2_full_long %>% 
 # inner_join(ID_miRNA,by="PID")

March 2024 Associating female E2–>Pain, mixture models, LCA

Alice Woolard

2024-03-14

miRNA