library(data.table);library(ggfortify)
library(factoextra);library(ClassDiscovery)
library(stringr);library(lattice);library(kableExtra)
library(ggplot2);library(MASS);library(memisc)
library(tidyverse);library(naniar);library(gtsummary)
require(lme4); require(nnet);
library(broom);library(readxl);library(nlme);library(sjPlot)
library(flexmix);library(poLCA)
read in and subset data
e2_full_long <- as.data.frame(read.csv("cleaned_E2_and_Aurora_Data.csv"))
#subset to estrogen collected at timepoint 0 == e2 collected at ED visit. convert pain to numeric
e2_full_long <- e2_full_long %>%
filter(AlternateID == "T0") %>%
mutate(NRS_pain = as.numeric(NRS_pain))
#1296 x 27
length(unique(e2_full_long[["PID"]])) #432 PID
## [1] 432
March 2024 Batch 2 GLS model
#need complete cases to run this model.
e2_full_long_gls_complete <- e2_full_long[complete.cases(e2_full_long),] #708
e2_full_long_gls <- e2_full_long[complete.cases(e2_full_long$NRS_pain), ] #1254
#e2_model <- gls(NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)
#e2_model <- gls(NRS_pain ~ 1, correlation = corAR1(form = ~ time | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)
#tab_model(e2_model)
Use flexmix package for mixture modeling
df <- e2_full_long_gls #1254
df6 <- e2_full_long_gls %>% filter(timepoint == 6) #432 obs
df6_complete <- e2_full_long_gls_complete %>% filter(timepoint == 6) #236 obs
#attempt #1
m <- flexmix(NRS_pain ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number, k = 2, data=df6_complete)
summary(m)
##
## Call:
## flexmix(formula = NRS_pain ~ sqrt.conc + ED_Age + ED_HighestGrade +
## BMI + ED_Pain + Race + Plate.Number, data = df6_complete,
## k = 2)
##
## prior size post>0 ratio
## Comp.1 0.199 42 126 0.333
## Comp.2 0.801 194 236 0.822
##
## 'log Lik.' -521.7948 (df=27)
## AIC: 1097.59 BIC: 1191.113
parameters(m, component = 1)
## Comp.1
## coef.(Intercept) 6.52428059
## coef.sqrt.conc -0.11282087
## coef.ED_Age -0.03751555
## coef.ED_HighestGradeBachelor's Degree -2.03571560
## coef.ED_HighestGradeHigh School or Less -1.35848215
## coef.ED_HighestGradeSome College or Associate Degree -1.26509036
## coef.BMI 0.16228141
## coef.ED_Pain -0.64069568
## coef.RaceNon-Hispanic Black -0.89860325
## coef.RaceNon-Hispanic Other -5.94522871
## coef.RaceNon-Hispanic White -2.43011311
## coef.Plate.Number 0.20282749
## sigma 0.60344288
attempt #2
ggplot(df6, aes(x = NRS_pain)) +
geom_histogram(aes(NRS_pain, ..density..), binwidth = 1, colour = "black", fill = "white")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#k parameters 2 instead of 3
mo1 <- FLXMRglm(family = "gaussian")
mo2 <- FLXMRglm(family = "gaussian")
#mo3 <- FLXMRglm(family = "gaussian")
m2 <- flexmix(NRS_pain ~ 1, data = df6, k = 2, model = list(mo1, mo2))
#flexfit <- flexmix(NRS_pain ~ 1, data = df6, k = 3, model = list(mo1, mo2, mo3))
c1 <- parameters(m2, component=1)[[1]]
c2 <- parameters(m2, component=2)[[1]]
#c3 <- parameters(m2, component=3)[[1]]
plot_mix_comps <- function(x, mu, sigma, lam) {
lam * dnorm(x, mu, sigma)
}
lam <- table(clusters(m2))
ggplot(df6) +
geom_histogram(aes(NRS_pain, ..density..), binwidth = 1, colour = "black", fill = "white") +
stat_function(geom = "line", fun = plot_mix_comps,
args = list(c1[1], c1[2], lam[1]/sum(lam)),
colour = "red", lwd = 1.5) +
stat_function(geom = "line", fun = plot_mix_comps,
args = list(c2[1], c2[2], lam[2]/sum(lam)),
colour = "blue", lwd = 1.5) +
#stat_function(geom = "line", fun = plot_mix_comps,args = list(c3[1], c3[2], lam[3]/sum(lam)),colour = "green", lwd = 1.5) +
ylab("Density")
#maybe
mixture model attempt #3
seed=69
df6_simple <- df6[,c("PID","NRS_pain","sqrt.conc")]
ggplot(df, aes(x = NRS_pain)) +
geom_density(color = "black", fill = "white") +
labs(x = "Pain", y = "Density", title = "Density Plot of Pain all times")
ggplot(df6_simple, aes(x = NRS_pain)) +
geom_density(color = "black", fill = "white") +
labs(x = "Pain", y = "Density", title = "Density Plot of Pain MONTH 6")
#m3 = flexmix(NRS_pain ~ sqrt.conc, data=df6_simple, k=2)
#summary(m3)
#parameters(m3)
latent class analysis
#recode e2_bin so it can be used in the model
df6_lca <- df6_complete %>%
mutate(e2_bin = recode(e2_bin, "low E2" = 1, "mid E2" = 2, "high E2" = 3))
#everything numeric start at 1, so increment NRS pain
df6_lca$NRS_pain <- df6_lca$NRS_pain + 1
#increment postmenopause
df6_lca$postmenopause <- df6_lca$postmenopause + 1
#increment axial
df6_lca$Axial_Pain <- df6_lca$Axial_Pain + 1
#increment widespread
df6_lca$Widespread_pain_2016 <- df6_lca$Widespread_pain_2016 + 1
#round conc
df6_lca$sqrt.conc <- round(df6_lca$sqrt.conc,digits=0)
#recode bmi
breaks <- c(-Inf, 18.5, 24.9, 29.9, Inf)
labels <- c("Underweight", "Healthy Weight", "Overweight", "Obesity")
df6_lca <- df6_lca %>%
mutate(BMI_cat = cut(BMI, breaks = breaks, labels = labels, include.lowest = TRUE))
#bin pains
df6_lca$bin_pain2 <- as.factor(df6_lca$bin_pain2)
levels(df6_lca$bin_pain2)
## [1] "1 to 9 pain" "10 pain"
f1 <- cbind(NRS_pain, sqrt.conc, Widespread_pain_2016,Axial_Pain, postmenopause,e2_bin)~1
f2 <- cbind(NRS_pain, sqrt.conc, Widespread_pain_2016,Axial_Pain, postmenopause,e2_bin, e2_confidence)~1
f3 <- cbind(Widespread_pain_2016,Axial_Pain, postmenopause, e2_confidence, BMI_cat,bin_pain2)~1
LCA2 <- poLCA(f1, data=df6_lca, nclass=2) #AIC(2): 3371.711 BIC(2): 3610.715
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $NRS_pain
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10)
## class 1: 0 0.1937 0.1285 0.2729 0.1610 0.1184 0.0786 0.0226 0.0103 0.0000
## class 2: 0 0.0088 0.0090 0.0203 0.0675 0.1266 0.1834 0.2227 0.1551 0.0624
## Pr(11)
## class 1: 0.0139
## class 2: 0.1443
##
## $sqrt.conc
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10)
## class 1: 0.0000 0.0587 0.1492 0.2392 0.2078 0.1514 0.0612 0.0215 0.0293 0.0169
## class 2: 0.0078 0.0598 0.1163 0.1967 0.2387 0.0288 0.1123 0.0677 0.0378 0.0403
## Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1: 0.0091 0.0196 0.0174 0.0000 0.0093 0.0000 0 0 0 0.0093
## class 2: 0.0157 0.0147 0.0087 0.0312 0.0000 0.0234 0 0 0 0.0000
##
## $Widespread_pain_2016
## Pr(1) Pr(2)
## class 1: 0.6045 0.3955
## class 2: 0.2875 0.7125
##
## $Axial_Pain
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.1581 0.8419
##
## $postmenopause
## Pr(1) Pr(2)
## class 1: 0.7357 0.2643
## class 2: 0.7386 0.2614
##
## $e2_bin
## Pr(1) Pr(2) Pr(3)
## class 1: 0.2560 0.5504 0.1936
## class 2: 0.2138 0.4343 0.3519
##
## Estimated class population shares
## 0.4564 0.5436
##
## Predicted class memberships (by modal posterior prob.)
## 0.4788 0.5212
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 236
## number of estimated parameters: 69
## residual degrees of freedom: 167
## maximum log-likelihood: -1694.527
##
## AIC(2): 3527.055
## BIC(2): 3766.059
## G^2(2): 982.0811 (Likelihood ratio/deviance statistic)
## X^2(2): 3672.267 (Chi-square goodness of fit)
##
LCA3 <- poLCA(f2, data=df6_lca, nclass=2) #AIC(2): 3957.45 BIC(2): 4217.238
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $NRS_pain
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10)
## class 1: 0 0.0758 0.0606 0.1061 0.1212 0.0758 0.1515 0.1970 0.0909 0.0455
## class 2: 0 0.1000 0.0647 0.1471 0.1059 0.1412 0.1294 0.1059 0.0882 0.0294
## Pr(11)
## class 1: 0.0758
## class 2: 0.0882
##
## $sqrt.conc
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10)
## class 1: 0.0000 0.0000 0.0000 0.0 0.0000 0.0000 0.3182 0.1667 0.1212 0.1061
## class 2: 0.0059 0.0824 0.1824 0.3 0.3118 0.1176 0.0000 0.0000 0.0000 0.0000
## Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1: 0.0455 0.0606 0.0455 0.0606 0.0152 0.0455 0 0 0 0.0152
## class 2: 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0 0 0 0.0000
##
## $Widespread_pain_2016
## Pr(1) Pr(2)
## class 1: 0.4091 0.5909
## class 2: 0.4412 0.5588
##
## $Axial_Pain
## Pr(1) Pr(2)
## class 1: 0.4848 0.5152
## class 2: 0.5647 0.4353
##
## $postmenopause
## Pr(1) Pr(2)
## class 1: 0.9242 0.0758
## class 2: 0.6647 0.3353
##
## $e2_bin
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0000 0.0000 1
## class 2: 0.3235 0.6765 0
##
## $e2_confidence
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.6667 0.1818 0.0758 0.0758
## class 2: 0.4059 0.2588 0.1412 0.1941
##
## Estimated class population shares
## 0.2797 0.7203
##
## Predicted class memberships (by modal posterior prob.)
## 0.2797 0.7203
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 236
## number of estimated parameters: 75
## residual degrees of freedom: 161
## maximum log-likelihood: -1903.725
##
## AIC(2): 3957.45
## BIC(2): 4217.238
## G^2(2): 1296.11 (Likelihood ratio/deviance statistic)
## X^2(2): 8004.288 (Chi-square goodness of fit)
##
LCA4 <- poLCA(f2, data=df6_lca, nclass=3) #AIC(3): 3977.52 BIC(3): 4368.933
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $NRS_pain
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10)
## class 1: 0 0.0957 0.0783 0.1391 0.1130 0.1304 0.1565 0.1217 0.0783 0.0261
## class 2: 0 0.1091 0.0364 0.1636 0.0909 0.1636 0.0727 0.0727 0.1091 0.0364
## class 3: 0 0.0758 0.0606 0.1061 0.1212 0.0758 0.1515 0.1970 0.0909 0.0455
## Pr(11)
## class 1: 0.0609
## class 2: 0.1455
## class 3: 0.0758
##
## $sqrt.conc
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7) Pr(8) Pr(9) Pr(10)
## class 1: 0.0000 0.0000 0.0000 0.3652 0.4609 0.1739 0.0000 0.0000 0.0000 0.0000
## class 2: 0.0182 0.2545 0.5636 0.1636 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## class 3: 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.3182 0.1667 0.1212 0.1061
## Pr(11) Pr(12) Pr(13) Pr(14) Pr(15) Pr(16) Pr(17) Pr(18) Pr(19) Pr(20)
## class 1: 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0 0 0 0.0000
## class 2: 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0 0 0 0.0000
## class 3: 0.0455 0.0606 0.0455 0.0606 0.0152 0.0455 0 0 0 0.0152
##
## $Widespread_pain_2016
## Pr(1) Pr(2)
## class 1: 0.4696 0.5304
## class 2: 0.3818 0.6182
## class 3: 0.4091 0.5909
##
## $Axial_Pain
## Pr(1) Pr(2)
## class 1: 0.5652 0.4348
## class 2: 0.5636 0.4364
## class 3: 0.4848 0.5152
##
## $postmenopause
## Pr(1) Pr(2)
## class 1: 0.7391 0.2609
## class 2: 0.5091 0.4909
## class 3: 0.9242 0.0758
##
## $e2_bin
## Pr(1) Pr(2) Pr(3)
## class 1: 0 1 0
## class 2: 1 0 0
## class 3: 0 0 1
##
## $e2_confidence
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.4957 0.1913 0.1217 0.1913
## class 2: 0.2182 0.4000 0.1818 0.2000
## class 3: 0.6667 0.1818 0.0758 0.0758
##
## Estimated class population shares
## 0.4873 0.2331 0.2797
##
## Predicted class memberships (by modal posterior prob.)
## 0.4873 0.2331 0.2797
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 236
## number of estimated parameters: 113
## residual degrees of freedom: 123
## maximum log-likelihood: -1804.119
##
## AIC(3): 3834.237
## BIC(3): 4225.65
## G^2(3): 1096.897 (Likelihood ratio/deviance statistic)
## X^2(3): 4775.175 (Chi-square goodness of fit)
##
LCA5 <- poLCA(f3, data=df6_lca, nclass=2) #AIC(2): 1979.188 BIC(2): 2038.073 #this and LCA6 are best
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $Widespread_pain_2016
## Pr(1) Pr(2)
## class 1: 0.1844 0.8156
## class 2: 0.5640 0.4360
##
## $Axial_Pain
## Pr(1) Pr(2)
## class 1: 0.0070 0.9930
## class 2: 0.8272 0.1728
##
## $postmenopause
## Pr(1) Pr(2)
## class 1: 0.6979 0.3021
## class 2: 0.7582 0.2418
##
## $e2_confidence
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.4403 0.1990 0.1536 0.2071
## class 2: 0.4993 0.2577 0.1065 0.1365
##
## $BMI_cat
## Underweight Healthy Weight Overweight Obesity
## class 1: 0.0322 0.2209 0.2437 0.5033
## class 2: 0.0088 0.2330 0.3702 0.3879
##
## $bin_pain2
## 1 to 9 pain 10 pain
## class 1: 0.8002 0.1998
## class 2: 0.9764 0.0236
##
## Estimated class population shares
## 0.3473 0.6527
##
## Predicted class memberships (by modal posterior prob.)
## 0.3729 0.6271
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 236
## number of estimated parameters: 21
## residual degrees of freedom: 215
## maximum log-likelihood: -1069.426
##
## AIC(2): 2180.851
## BIC(2): 2253.592
## G^2(2): 176.3656 (Likelihood ratio/deviance statistic)
## X^2(2): 303.8513 (Chi-square goodness of fit)
##
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND
##
LCA6 <- poLCA(f3, data=df6_lca, nclass=3) #AIC(3): 1967.918 BIC(3): 2057.978 #this and LCA5 are best
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $Widespread_pain_2016
## Pr(1) Pr(2)
## class 1: 0.7860 0.2140
## class 2: 0.4941 0.5059
## class 3: 0.2173 0.7827
##
## $Axial_Pain
## Pr(1) Pr(2)
## class 1: 0.8657 0.1343
## class 2: 0.8835 0.1165
## class 3: 0.0000 1.0000
##
## $postmenopause
## Pr(1) Pr(2)
## class 1: 0.8897 0.1103
## class 2: 0.7119 0.2881
## class 3: 0.7081 0.2919
##
## $e2_confidence
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.1135 0.3591 0.4261 0.1014
## class 2: 0.6198 0.2297 0.0000 0.1505
## class 3: 0.4516 0.1984 0.1526 0.1973
##
## $BMI_cat
## Underweight Healthy Weight Overweight Obesity
## class 1: 0.0300 0.0000 0.5419 0.4281
## class 2: 0.0000 0.3044 0.3207 0.3749
## class 3: 0.0324 0.2272 0.2480 0.4924
##
## $bin_pain2
## 1 to 9 pain 10 pain
## class 1: 1.0000 0.0000
## class 2: 0.9688 0.0312
## class 3: 0.8167 0.1833
##
## Estimated class population shares
## 0.1512 0.4658 0.3831
##
## Predicted class memberships (by modal posterior prob.)
## 0.1229 0.4364 0.4407
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 236
## number of estimated parameters: 32
## residual degrees of freedom: 204
## maximum log-likelihood: -1060.64
##
## AIC(3): 2185.28
## BIC(3): 2296.122
## G^2(3): 158.7938 (Likelihood ratio/deviance statistic)
## X^2(3): 261.4029 (Chi-square goodness of fit)
##
# Calculate entropy (3-class mode)l- values closer to 1.0 indicate greater separation of the classes.
entropy<-function (p) sum(-p*log(p))
error_prior <- entropy(LCA6$P) # Class proportions
error_post <- mean(apply(LCA6$posterior, 1, entropy))
(error_prior - error_post) / error_prior
## [1] NaN
#predicted class membership is in:
LCA6$predclass[1:30]
## [1] 3 3 3 1 3 3 2 2 1 3 2 1 3 3 3 1 3 2 1 1 3 2 3 2 2 1 3 3 3 2
#add variable to data set with all variables so it can be used as predictor variable:
df6_lca$class <- LCA6$predclass
ggplot(df6_lca, aes(x = NRS_pain, fill = as.factor(class))) +
geom_density(alpha = 0.5) +
labs(x = "Pain Across m6", y = "Density", title = "Pain m6 by latent class")
ggplot(df6_lca, aes(x = sqrt.conc, fill = as.factor(class))) +
geom_density(alpha = 0.5) +
labs(x = "e2 Across m6", y = "Density", title = "e2 m6 by latent class")
ggplot(df6_lca, aes(x = BMI, fill = as.factor(class))) +
geom_density(alpha = 0.5) +
labs(x = "bmi Across m6", y = "Density", title = "bmi m6 by latent class")
#adding the E2_confidence feature changes the E2 concentration binning by class
plot(LCA5)
plot(LCA6)
plotting affinity classes
f2a <- cbind(Widespread_pain_2016,Axial_Pain, postmenopause, e2_confidence, BMI_cat,bin_pain2)~e2_bin
nes2a <- poLCA(f2a,df6_lca,nclass=3,nrep=5)
## Model 1: llik = -1040.359 ... best llik = -1040.359
## Model 2: llik = -1067.829 ... best llik = -1040.359
## Model 3: llik = -1041.097 ... best llik = -1040.359
## Model 4: llik = -1041.097 ... best llik = -1040.359
## Model 5: llik = -1040.359 ... best llik = -1040.359
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $Widespread_pain_2016
## Pr(1) Pr(2)
## class 1: 0.1806 0.8194
## class 2: 0.9964 0.0036
## class 3: 0.4113 0.5887
##
## $Axial_Pain
## Pr(1) Pr(2)
## class 1: 0.3937 0.6063
## class 2: 1.0000 0.0000
## class 3: 0.4817 0.5183
##
## $postmenopause
## Pr(1) Pr(2)
## class 1: 0.4293 0.5707
## class 2: 0.7538 0.2462
## class 3: 0.9742 0.0258
##
## $e2_confidence
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.2686 0.2932 0.1085 0.3296
## class 2: 0.3384 0.3507 0.1987 0.1122
## class 3: 0.6996 0.1489 0.1047 0.0468
##
## $BMI_cat
## Underweight Healthy Weight Overweight Obesity
## class 1: 0.0142 0.2075 0.4054 0.3729
## class 2: 0.0222 0.0821 0.4658 0.4299
## class 3: 0.0171 0.3028 0.2094 0.4707
##
## $bin_pain2
## 1 to 9 pain 10 pain
## class 1: 0.8429 0.1571
## class 2: 1.0000 0.0000
## class 3: 0.9394 0.0606
##
## Estimated class population shares
## 0.3626 0.1787 0.4588
##
## Predicted class memberships (by modal posterior prob.)
## 0.3263 0.1992 0.4746
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) -1.51919 0.81303 -1.869 0.063
## e2_bin 0.48322 0.44740 1.080 0.281
## =========================================================
## 3 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) -5.18142 1.44477 -3.586 0
## e2_bin 2.62987 0.66141 3.976 0
## =========================================================
## number of observations: 236
## number of estimated parameters: 34
## residual degrees of freedom: 202
## maximum log-likelihood: -1040.359
##
## AIC(3): 2148.718
## BIC(3): 2266.488
## X^2(3): 256.2171 (Chi-square goodness of fit)
##
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND
##
## ALERT: estimation algorithm automatically restarted with new initial values
##
pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)
matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
main="e2 as a predictor of pain affinity class",
xlab="estrogen category ",
ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")
summary table
length(unique(e2_full_long[["PID"]])) #432 PID
## [1] 432
e2_full_long %>%
select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**Full Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
#add_overall() %>%
add_p()%>%
bold_p()
| Characteristic | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|
| high E2, N = 3571 | low E2, N = 2731 | mid E2, N = 6661 | ||
| ED_Age | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.001 | |||
| Advanced Degree | 18 (5.0%) | 18 (6.6%) | 51 (7.7%) | |
| Bachelor's Degree | 60 (17%) | 69 (25%) | 90 (14%) | |
| High School or Less | 138 (39%) | 93 (34%) | 246 (37%) | |
| Some College or Associate Degree | 141 (39%) | 93 (34%) | 279 (42%) | |
| BMI | 31 (10) | 28 (6) | 30 (8) | 0.005 |
| Unknown | 18 | 21 | 57 | |
| Widespread_pain_2016 | 152 (44%) | 120 (46%) | 232 (36%) | 0.009 |
| Unknown | 9 | 11 | 25 | |
| Axial_Pain | 169 (62%) | 127 (58%) | 289 (56%) | 0.3 |
| Unknown | 86 | 54 | 152 | |
| ED_Pain | 7 (2) | 6 (3) | 6 (3) | 0.005 |
| NRS_pain | 4.5 (3.3) | 4.4 (3.2) | 4.0 (3.0) | 0.028 |
| Unknown | 9 | 9 | 24 | |
| bin_pain | 0.2 | |||
| 0 pain | 77 (22%) | 42 (16%) | 123 (19%) | |
| Any pain | 271 (78%) | 222 (84%) | 519 (81%) | |
| Unknown | 9 | 9 | 24 | |
| bin_pain2 | 0.020 | |||
| 0 pain | 77 (22%) | 42 (16%) | 123 (19%) | |
| 1 to 9 pain | 247 (71%) | 197 (75%) | 490 (76%) | |
| 10 pain | 24 (6.9%) | 25 (9.5%) | 29 (4.5%) | |
| Unknown | 9 | 9 | 24 | |
| postmenopause | 27 (7.6%) | 114 (42%) | 153 (23%) | <0.001 |
| 1 Mean (SD); n (%) | ||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | ||||
# NRS pain is significant by estrogen bin?
#when all time points are combined, pain is significant across the different E2 categories
#but this has limited interpretability because pain above is across all time points
#summary table for MONTH 2
#this is the only one sig for some reason
e2_full_long %>%
filter(timepoint == 2) %>%
select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**8WK Table: Features by Estrogen Bin, 8 WEEK TIME POINT**") %>%
#add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|
| high E2, N = 1191 | low E2, N = 911 | mid E2, N = 2221 | ||
| ED_Age | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.3 | |||
| Advanced Degree | 6 (5.0%) | 6 (6.6%) | 17 (7.7%) | |
| Bachelor's Degree | 20 (17%) | 23 (25%) | 30 (14%) | |
| High School or Less | 46 (39%) | 31 (34%) | 82 (37%) | |
| Some College or Associate Degree | 47 (39%) | 31 (34%) | 93 (42%) | |
| BMI | 31 (10) | 28 (6) | 30 (8) | 0.2 |
| Unknown | 6 | 7 | 19 | |
| Widespread_pain_2016 | 57 (50%) | 40 (47%) | 77 (36%) | 0.043 |
| Unknown | 5 | 5 | 11 | |
| Axial_Pain | 77 (79%) | 68 (87%) | 156 (85%) | 0.3 |
| Unknown | 21 | 13 | 38 | |
| ED_Pain | 7 (2) | 6 (3) | 6 (3) | 0.2 |
| NRS_pain | 5.1 (3.1) | 4.7 (3.0) | 4.4 (2.9) | 0.095 |
| Unknown | 5 | 4 | 11 | |
| bin_pain | 0.7 | |||
| 0 pain | 16 (14%) | 9 (10%) | 27 (13%) | |
| Any pain | 98 (86%) | 78 (90%) | 184 (87%) | |
| Unknown | 5 | 4 | 11 | |
| bin_pain2 | 0.5 | |||
| 0 pain | 16 (14%) | 9 (10%) | 27 (13%) | |
| 1 to 9 pain | 87 (76%) | 71 (82%) | 173 (82%) | |
| 10 pain | 11 (9.6%) | 7 (8.0%) | 11 (5.2%) | |
| Unknown | 5 | 4 | 11 | |
| postmenopause | 9 (7.6%) | 38 (42%) | 51 (23%) | <0.001 |
| Plate.Number | 11 (6) | 12 (6) | 10 (7) | 0.2 |
| 1 Mean (SD); n (%) | ||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | ||||
#summary table for MONTH 3
e2_full_long %>%
filter(timepoint == 3) %>%
select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M3 Table: Features by Estrogen Bin, MONTH 3 TIME POINT**") %>%
#add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|
| high E2, N = 1191 | low E2, N = 911 | mid E2, N = 2221 | ||
| ED_Age | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.3 | |||
| Advanced Degree | 6 (5.0%) | 6 (6.6%) | 17 (7.7%) | |
| Bachelor's Degree | 20 (17%) | 23 (25%) | 30 (14%) | |
| High School or Less | 46 (39%) | 31 (34%) | 82 (37%) | |
| Some College or Associate Degree | 47 (39%) | 31 (34%) | 93 (42%) | |
| BMI | 31 (10) | 28 (6) | 30 (8) | 0.2 |
| Unknown | 6 | 7 | 19 | |
| Widespread_pain_2016 | 47 (41%) | 39 (45%) | 77 (37%) | 0.4 |
| Unknown | 4 | 5 | 13 | |
| Axial_Pain | 48 (53%) | 31 (44%) | 67 (40%) | 0.12 |
| Unknown | 29 | 20 | 55 | |
| ED_Pain | 7 (2) | 6 (3) | 6 (3) | 0.2 |
| NRS_pain | 4.4 (3.3) | 4.5 (3.2) | 3.9 (2.9) | 0.3 |
| Unknown | 4 | 5 | 13 | |
| bin_pain | 0.6 | |||
| 0 pain | 25 (22%) | 14 (16%) | 38 (18%) | |
| Any pain | 90 (78%) | 72 (84%) | 171 (82%) | |
| Unknown | 4 | 5 | 13 | |
| bin_pain2 | 0.3 | |||
| 0 pain | 25 (22%) | 14 (16%) | 38 (18%) | |
| 1 to 9 pain | 82 (71%) | 64 (74%) | 163 (78%) | |
| 10 pain | 8 (7.0%) | 8 (9.3%) | 8 (3.8%) | |
| Unknown | 4 | 5 | 13 | |
| postmenopause | 9 (7.6%) | 38 (42%) | 51 (23%) | <0.001 |
| Plate.Number | 11 (6) | 12 (6) | 10 (7) | 0.2 |
| 1 Mean (SD); n (%) | ||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | ||||
#summary table for MONTH 6
e2_full_long %>%
filter(timepoint == 6) %>%
select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M6 Table: Features by Estrogen Bin, MONTH 6 TIME POINT**") %>%
#add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|
| high E2, N = 1191 | low E2, N = 911 | mid E2, N = 2221 | ||
| ED_Age | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.3 | |||
| Advanced Degree | 6 (5.0%) | 6 (6.6%) | 17 (7.7%) | |
| Bachelor's Degree | 20 (17%) | 23 (25%) | 30 (14%) | |
| High School or Less | 46 (39%) | 31 (34%) | 82 (37%) | |
| Some College or Associate Degree | 47 (39%) | 31 (34%) | 93 (42%) | |
| BMI | 31 (10) | 28 (6) | 30 (8) | 0.2 |
| Unknown | 6 | 7 | 19 | |
| Widespread_pain_2016 | 48 (40%) | 41 (46%) | 78 (35%) | 0.2 |
| Unknown | 0 | 1 | 1 | |
| Axial_Pain | 44 (53%) | 28 (40%) | 66 (40%) | 0.14 |
| Unknown | 36 | 21 | 59 | |
| ED_Pain | 7 (2) | 6 (3) | 6 (3) | 0.2 |
| NRS_pain | 3.9 (3.3) | 4.1 (3.4) | 3.5 (3.0) | 0.4 |
| bin_pain | 0.3 | |||
| 0 pain | 36 (30%) | 19 (21%) | 58 (26%) | |
| Any pain | 83 (70%) | 72 (79%) | 164 (74%) | |
| bin_pain2 | 0.12 | |||
| 0 pain | 36 (30%) | 19 (21%) | 58 (26%) | |
| 1 to 9 pain | 78 (66%) | 62 (68%) | 154 (69%) | |
| 10 pain | 5 (4.2%) | 10 (11%) | 10 (4.5%) | |
| postmenopause | 9 (7.6%) | 38 (42%) | 51 (23%) | <0.001 |
| Plate.Number | 11 (6) | 12 (6) | 10 (7) | 0.2 |
| 1 Mean (SD); n (%) | ||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | ||||
what if I only look at low and high e2 levels? doesn’t work
e2_full_long %>%
filter(e2_bin %in% c("high E2", "low E2")) %>%
select(ED_Age, ED_HighestGrade, BMI, Widespread_pain_2016, Axial_Pain, ED_Pain, NRS_pain, e2_bin, bin_pain, bin_pain2, postmenopause,Plate.Number) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
add_p() %>% bold_p() #%>% add_overall()
| Characteristic | high E2, N = 3571 | low E2, N = 2731 | p-value2 |
|---|---|---|---|
| ED_Age | 33 (11) | 44 (13) | <0.001 |
| ED_HighestGrade | 0.042 | ||
| Advanced Degree | 18 (5.0%) | 18 (6.6%) | |
| Bachelor's Degree | 60 (17%) | 69 (25%) | |
| High School or Less | 138 (39%) | 93 (34%) | |
| Some College or Associate Degree | 141 (39%) | 93 (34%) | |
| BMI | 31 (10) | 28 (6) | 0.002 |
| Unknown | 18 | 21 | |
| Widespread_pain_2016 | 152 (44%) | 120 (46%) | 0.6 |
| Unknown | 9 | 11 | |
| Axial_Pain | 169 (62%) | 127 (58%) | 0.3 |
| Unknown | 86 | 54 | |
| ED_Pain | 7 (2) | 6 (3) | 0.003 |
| NRS_pain | 4.5 (3.3) | 4.4 (3.2) | 0.9 |
| Unknown | 9 | 9 | |
| bin_pain | 0.054 | ||
| 0 pain | 77 (22%) | 42 (16%) | |
| Any pain | 271 (78%) | 222 (84%) | |
| Unknown | 9 | 9 | |
| bin_pain2 | 0.11 | ||
| 0 pain | 77 (22%) | 42 (16%) | |
| 1 to 9 pain | 247 (71%) | 197 (75%) | |
| 10 pain | 24 (6.9%) | 25 (9.5%) | |
| Unknown | 9 | 9 | |
| postmenopause | 27 (7.6%) | 114 (42%) | <0.001 |
| Plate.Number | 11 (6) | 12 (6) | 0.088 |
| 1 Mean (SD); n (%) | |||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | |||
#nothing. also repeated for each timepoint and nothing sig
i don’t know. it’s not signficant when stratified by time point. I think this is because it is auto-correlating when all time points are combined because you have three of each case
visuals
ggplot(e2_full_long, aes(x = NRS_pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain Across All Time Points", y = "Density", title = "Pain All Time Points, by Estrogen Bin") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
## Warning: Removed 42 rows containing non-finite outside the scale range
## (`stat_density()`).
#high and mid E2 peak at 0 pain. Everything mostly falls off at pain 10.
#BELOW: PAIN AT 2 MONTHS. Mostly mid level pain everyone
ggplot(data = e2_full_long %>% filter(timepoint == 2), aes(x = NRS_pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain at 2 MONTHS", y = "Density", title = "Pain at 2 MONTHS, by Estrogen Category") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_density()`).
#BELOW: PAIN AT 3 MONTHS, mostly mid level pain, high and mid e2 start to bounce back down to zero
ggplot(data = e2_full_long %>% filter(timepoint == 3), aes(x = NRS_pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain at 3 MONTHS", y = "Density", title = "Pain at 3 MONTHS, by Estrogen Category") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_density()`).
#BELOW: PAIN AT 6 MONTHS. Mid and high e2 have a strong peak at 0 pain, and high e2 has another strong bump at ~6 pain. Low e2 is mildly right skewed towards 10.
ggplot(data = e2_full_long %>% filter(timepoint == 6), aes(x = NRS_pain, fill = e2_bin)) +
geom_density(alpha = 0.5) +
labs(x = "Pain at 6 MONTHS", y = "Density", title = "Pain at 6 MONTHS, by Estrogen Category") +
scale_fill_manual(values = c("low E2" = "blue", "mid E2" = "green", "high E2" = "red"))
Ok this is very useful!! We see that low E2 has fewer 0s and more 10s at each time point. This makes me think a different pain stratification should take place that focuses on the extremes of the pain NRS scale.
#High and Low E2 level at month 2 - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 2, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
geom_histogram(binwidth = .6, position = "dodge", color="black") +
scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
labs(title = "Pain at Month 2 by High and Low E2",
x = "Pain Score",
y = "Frequency")
## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).
#High and Low E2 level at month 3 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 3, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
geom_histogram(binwidth = .6, position = "dodge", color="black") +
scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
labs(title = "Pain at Month 3 by High and Low E2",
x = "Pain Score",
y = "Frequency")
## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).
#High and Low E2 level at month 6 - - way more 0s for high E2 and more 10s for low E2
ggplot(e2_full_long %>% filter(timepoint == 6, e2_bin %in% c("high E2", "low E2")), aes(x = NRS_pain, fill = e2_bin)) +
geom_histogram(binwidth = .6, position = "dodge", color="black") +
scale_fill_manual(values = c("high E2" = "paleturquoise3", "low E2" = "lightcoral")) +
labs(title = "Pain at Month 6 by High and Low E2",
x = "Pain Score",
y = "Frequency")
chi square ?
table(e2_full_long$e2_bin, as.factor(e2_full_long$NRS_pain))
##
## 0 1 2 3 4 5 6 7 8 9 10
## high E2 77 17 18 20 32 34 45 30 38 13 24
## low E2 42 23 17 28 27 27 25 20 25 5 25
## mid E2 123 56 49 67 57 86 68 51 31 25 29
table(e2_full_long$e2_bin, e2_full_long$bin_pain)
##
## 0 pain Any pain
## high E2 77 271
## low E2 42 222
## mid E2 123 519
chisq.test(e2_full_long$e2_bin, as.factor(e2_full_long$NRS_pain))
##
## Pearson's Chi-squared test
##
## data: e2_full_long$e2_bin and as.factor(e2_full_long$NRS_pain)
## X-squared = 44.635, df = 20, p-value = 0.001236
chisq.test(e2_full_long$e2_bin, e2_full_long$bin_pain) #p-value = 0.1896
##
## Pearson's Chi-squared test
##
## data: e2_full_long$e2_bin and e2_full_long$bin_pain
## X-squared = 3.7424, df = 2, p-value = 0.1539
kruskal.test(NRS_pain ~ e2_bin, data = e2_full_long) # p-value = 0.00459
##
## Kruskal-Wallis rank sum test
##
## data: NRS_pain by e2_bin
## Kruskal-Wallis chi-squared = 7.1609, df = 2, p-value = 0.02786
summary table
#each time point
e2_full_long %>%
#filter(time == 2) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**Combined Table: Features by Estrogen Bin, ALL TIME POINTS**") %>%
# add_overall() %>%
add_p() %>%
bold_p()
| Characteristic | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|
| high E2, N = 3571 | low E2, N = 2731 | mid E2, N = 6661 | ||
| ED_Age | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.001 | |||
| Advanced Degree | 18 (5.0%) | 18 (6.6%) | 51 (7.7%) | |
| Bachelor's Degree | 60 (17%) | 69 (25%) | 90 (14%) | |
| High School or Less | 138 (39%) | 93 (34%) | 246 (37%) | |
| Some College or Associate Degree | 141 (39%) | 93 (34%) | 279 (42%) | |
| BMI | 31 (10) | 28 (6) | 30 (8) | 0.005 |
| Unknown | 18 | 21 | 57 | |
| Race | <0.001 | |||
| Hispanic | 42 (12%) | 54 (20%) | 96 (15%) | |
| Non-Hispanic Black | 165 (47%) | 96 (35%) | 324 (49%) | |
| Non-Hispanic Other | 33 (9.3%) | 15 (5.5%) | 21 (3.2%) | |
| Non-Hispanic White | 114 (32%) | 108 (40%) | 219 (33%) | |
| Unknown | 3 | 0 | 6 | |
| sqrt.conc | 9.63 (3.08) | 3.01 (0.57) | 4.85 (0.74) | <0.001 |
| ED_Pain | 7 (2) | 6 (3) | 6 (3) | 0.005 |
| bin_pain | 0.2 | |||
| 0 pain | 77 (22%) | 42 (16%) | 123 (19%) | |
| Any pain | 271 (78%) | 222 (84%) | 519 (81%) | |
| Unknown | 9 | 9 | 24 | |
| NRS_pain | 4.5 (3.3) | 4.4 (3.2) | 4.0 (3.0) | 0.028 |
| Unknown | 9 | 9 | 24 | |
| bin_pain2 | 0.020 | |||
| 0 pain | 77 (22%) | 42 (16%) | 123 (19%) | |
| 1 to 9 pain | 247 (71%) | 197 (75%) | 490 (76%) | |
| 10 pain | 24 (6.9%) | 25 (9.5%) | 29 (4.5%) | |
| Unknown | 9 | 9 | 24 | |
| 1 Mean (SD); n (%) | ||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | ||||
#bin pain 2 and bin pain 3 significant (p<.001)
#month 2
e2_full_long %>%
filter(timepoint == 2) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**WK8 Table: Features by Estrogen Bin, 8 weeks**") %>%
add_overall() %>%
add_p() %>%
bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6. LDKEY=620 is too small for this problem,
## (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
| Characteristic | Overall, N = 4321 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 1191 | low E2, N = 911 | mid E2, N = 2221 | |||
| ED_Age | 37 (13) | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.3 | ||||
| Advanced Degree | 29 (6.7%) | 6 (5.0%) | 6 (6.6%) | 17 (7.7%) | |
| Bachelor's Degree | 73 (17%) | 20 (17%) | 23 (25%) | 30 (14%) | |
| High School or Less | 159 (37%) | 46 (39%) | 31 (34%) | 82 (37%) | |
| Some College or Associate Degree | 171 (40%) | 47 (39%) | 31 (34%) | 93 (42%) | |
| BMI | 30 (8) | 31 (10) | 28 (6) | 30 (8) | 0.2 |
| Unknown | 32 | 6 | 7 | 19 | |
| Race | |||||
| Hispanic | 64 (15%) | 14 (12%) | 18 (20%) | 32 (15%) | |
| Non-Hispanic Black | 195 (45%) | 55 (47%) | 32 (35%) | 108 (49%) | |
| Non-Hispanic Other | 23 (5.4%) | 11 (9.3%) | 5 (5.5%) | 7 (3.2%) | |
| Non-Hispanic White | 147 (34%) | 38 (32%) | 36 (40%) | 73 (33%) | |
| Unknown | 3 | 1 | 0 | 2 | |
| sqrt.conc | 5.78 (3.02) | 9.63 (3.09) | 3.01 (0.57) | 4.85 (0.74) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.2 |
| bin_pain | 0.7 | ||||
| 0 pain | 52 (13%) | 16 (14%) | 9 (10%) | 27 (13%) | |
| Any pain | 360 (87%) | 98 (86%) | 78 (90%) | 184 (87%) | |
| Unknown | 20 | 5 | 4 | 11 | |
| NRS_pain | 4.7 (3.0) | 5.1 (3.1) | 4.7 (3.0) | 4.4 (2.9) | 0.095 |
| Unknown | 20 | 5 | 4 | 11 | |
| bin_pain2 | 0.5 | ||||
| 0 pain | 52 (13%) | 16 (14%) | 9 (10%) | 27 (13%) | |
| 1 to 9 pain | 331 (80%) | 87 (76%) | 71 (82%) | 173 (82%) | |
| 10 pain | 29 (7.0%) | 11 (9.6%) | 7 (8.0%) | 11 (5.2%) | |
| Unknown | 20 | 5 | 4 | 11 | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
#bin pain 3 significant (p=0.047) but not bin_pain2
#month 3
e2_full_long %>%
filter(timepoint == 3) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M3 Table: Features by Estrogen Bin, 3 months**") %>%
add_overall() %>%
add_p() %>%
bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6. LDKEY=620 is too small for this problem,
## (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
| Characteristic | Overall, N = 4321 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 1191 | low E2, N = 911 | mid E2, N = 2221 | |||
| ED_Age | 37 (13) | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.3 | ||||
| Advanced Degree | 29 (6.7%) | 6 (5.0%) | 6 (6.6%) | 17 (7.7%) | |
| Bachelor's Degree | 73 (17%) | 20 (17%) | 23 (25%) | 30 (14%) | |
| High School or Less | 159 (37%) | 46 (39%) | 31 (34%) | 82 (37%) | |
| Some College or Associate Degree | 171 (40%) | 47 (39%) | 31 (34%) | 93 (42%) | |
| BMI | 30 (8) | 31 (10) | 28 (6) | 30 (8) | 0.2 |
| Unknown | 32 | 6 | 7 | 19 | |
| Race | |||||
| Hispanic | 64 (15%) | 14 (12%) | 18 (20%) | 32 (15%) | |
| Non-Hispanic Black | 195 (45%) | 55 (47%) | 32 (35%) | 108 (49%) | |
| Non-Hispanic Other | 23 (5.4%) | 11 (9.3%) | 5 (5.5%) | 7 (3.2%) | |
| Non-Hispanic White | 147 (34%) | 38 (32%) | 36 (40%) | 73 (33%) | |
| Unknown | 3 | 1 | 0 | 2 | |
| sqrt.conc | 5.78 (3.02) | 9.63 (3.09) | 3.01 (0.57) | 4.85 (0.74) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.2 |
| bin_pain | 0.6 | ||||
| 0 pain | 77 (19%) | 25 (22%) | 14 (16%) | 38 (18%) | |
| Any pain | 333 (81%) | 90 (78%) | 72 (84%) | 171 (82%) | |
| Unknown | 22 | 4 | 5 | 13 | |
| NRS_pain | 4.2 (3.1) | 4.4 (3.3) | 4.5 (3.2) | 3.9 (2.9) | 0.3 |
| Unknown | 22 | 4 | 5 | 13 | |
| bin_pain2 | 0.3 | ||||
| 0 pain | 77 (19%) | 25 (22%) | 14 (16%) | 38 (18%) | |
| 1 to 9 pain | 309 (75%) | 82 (71%) | 64 (74%) | 163 (78%) | |
| 10 pain | 24 (5.9%) | 8 (7.0%) | 8 (9.3%) | 8 (3.8%) | |
| Unknown | 22 | 4 | 5 | 13 | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
#bin_pain3 is sig (p=0.012) but not bin pain 2
#month 6
e2_full_long %>%
filter(timepoint == 6) %>%
select(ED_Age, ED_HighestGrade, BMI, Race, sqrt.conc, ED_Pain, bin_pain, NRS_pain, e2_bin, bin_pain2) %>%
tbl_summary(by = e2_bin,
statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Relative Estrogen Bin**") %>%
modify_caption("**M6 Table: Features by Estrogen Bin, 6 months**") %>%
add_overall() %>%
add_p() %>%
bold_p()
## There was an error in 'add_p()/add_difference()' for variable 'Race', p-value omitted:
## Error in stats::fisher.test(c("Non-Hispanic Black", "Hispanic", "Non-Hispanic Black", : FEXACT error 6. LDKEY=620 is too small for this problem,
## (ii := key2[itp=930] = 1696008, ldstp=18600)
## Try increasing the size of the workspace and possibly 'mult'
| Characteristic | Overall, N = 4321 | Relative Estrogen Bin | p-value2 | ||
|---|---|---|---|---|---|
| high E2, N = 1191 | low E2, N = 911 | mid E2, N = 2221 | |||
| ED_Age | 37 (13) | 33 (11) | 44 (13) | 37 (14) | <0.001 |
| ED_HighestGrade | 0.3 | ||||
| Advanced Degree | 29 (6.7%) | 6 (5.0%) | 6 (6.6%) | 17 (7.7%) | |
| Bachelor's Degree | 73 (17%) | 20 (17%) | 23 (25%) | 30 (14%) | |
| High School or Less | 159 (37%) | 46 (39%) | 31 (34%) | 82 (37%) | |
| Some College or Associate Degree | 171 (40%) | 47 (39%) | 31 (34%) | 93 (42%) | |
| BMI | 30 (8) | 31 (10) | 28 (6) | 30 (8) | 0.2 |
| Unknown | 32 | 6 | 7 | 19 | |
| Race | |||||
| Hispanic | 64 (15%) | 14 (12%) | 18 (20%) | 32 (15%) | |
| Non-Hispanic Black | 195 (45%) | 55 (47%) | 32 (35%) | 108 (49%) | |
| Non-Hispanic Other | 23 (5.4%) | 11 (9.3%) | 5 (5.5%) | 7 (3.2%) | |
| Non-Hispanic White | 147 (34%) | 38 (32%) | 36 (40%) | 73 (33%) | |
| Unknown | 3 | 1 | 0 | 2 | |
| sqrt.conc | 5.78 (3.02) | 9.63 (3.09) | 3.01 (0.57) | 4.85 (0.74) | <0.001 |
| ED_Pain | 6 (3) | 7 (2) | 6 (3) | 6 (3) | 0.2 |
| bin_pain | 0.3 | ||||
| 0 pain | 113 (26%) | 36 (30%) | 19 (21%) | 58 (26%) | |
| Any pain | 319 (74%) | 83 (70%) | 72 (79%) | 164 (74%) | |
| NRS_pain | 3.7 (3.2) | 3.9 (3.3) | 4.1 (3.4) | 3.5 (3.0) | 0.4 |
| bin_pain2 | 0.12 | ||||
| 0 pain | 113 (26%) | 36 (30%) | 19 (21%) | 58 (26%) | |
| 1 to 9 pain | 294 (68%) | 78 (66%) | 62 (68%) | 154 (69%) | |
| 10 pain | 25 (5.8%) | 5 (4.2%) | 10 (11%) | 10 (4.5%) | |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
#bin pain 2 sig (p=0.021) and bin pain 3 sig (p<.001)
stats for new categories
table(e2_full_long$e2_bin, e2_full_long$bin_pain2)
##
## 0 pain 1 to 9 pain 10 pain
## high E2 77 247 24
## low E2 42 197 25
## mid E2 123 490 29
round(prop.table(table(e2_full_long$e2_bin, e2_full_long$bin_pain2), margin = 1) * 100,2)
##
## 0 pain 1 to 9 pain 10 pain
## high E2 22.13 70.98 6.90
## low E2 15.91 74.62 9.47
## mid E2 19.16 76.32 4.52
chisq.test(e2_full_long[e2_full_long$timepoint == 2, "e2_bin"], e2_full_long[e2_full_long$timepoint == 2, "bin_pain2"]) # p-value = 0.03053
##
## Pearson's Chi-squared test
##
## data: e2_full_long[e2_full_long$timepoint == 2, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 2, "bin_pain2"]
## X-squared = 3.0896, df = 4, p-value = 0.5429
chisq.test(e2_full_long[e2_full_long$timepoint == 3, "e2_bin"], e2_full_long[e2_full_long$timepoint == 3, "bin_pain2"]) #p-value = 0.007257
##
## Pearson's Chi-squared test
##
## data: e2_full_long[e2_full_long$timepoint == 3, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 3, "bin_pain2"]
## X-squared = 4.7671, df = 4, p-value = 0.312
chisq.test(e2_full_long[e2_full_long$timepoint == 6, "e2_bin"], e2_full_long[e2_full_long$timepoint == 6, "bin_pain2"]) #p-value = 7.419e-05
##
## Pearson's Chi-squared test
##
## data: e2_full_long[e2_full_long$timepoint == 6, "e2_bin"] and e2_full_long[e2_full_long$timepoint == 6, "bin_pain2"]
## X-squared = 7.3016, df = 4, p-value = 0.1208
redo model from above
MARCH 2024 NEW E2 DATA CODE GLS MODEL
e2_model <- gls(NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ timepoint | PID), control = list(singular.ok = FALSE), data=e2_full_long_gls_complete)
summary(e2_model)
## Generalized least squares fit by REML
## Model: NRS_pain ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin
## Data: e2_full_long_gls_complete
## AIC BIC logLik
## 3117.277 3194.474 -1541.638
##
## Correlation Structure: ARMA(1,0)
## Formula: ~timepoint | PID
## Parameter estimate(s):
## Phi1
## 0.7112736
##
## Coefficients:
## Value Std.Error t-value
## (Intercept) 2.2400046 1.1756720 1.905297
## timepoint -0.1500786 0.0480453 -3.123692
## sqrt.conc -0.0013510 0.0759092 -0.017797
## ED_Age 0.0355369 0.0103781 3.424209
## ED_HighestGradeBachelor's Degree 0.5937591 0.5415318 1.096444
## ED_HighestGradeHigh School or Less 1.4262587 0.5198131 2.743791
## ED_HighestGradeSome College or Associate Degree 1.4480613 0.5061676 2.860833
## BMI 0.0267255 0.0147797 1.808262
## ED_Pain 0.1949444 0.0518378 3.760663
## RaceNon-Hispanic Black -0.1159866 0.3971122 -0.292075
## RaceNon-Hispanic Other -0.6842188 0.6211972 -1.101452
## RaceNon-Hispanic White -0.7103415 0.4011102 -1.770938
## Plate.Number 0.0051389 0.0203987 0.251921
## e2_binlow E2 -0.3251953 0.6294793 -0.516610
## e2_binmid E2 -0.7181006 0.4736169 -1.516205
## p-value
## (Intercept) 0.0572
## timepoint 0.0019
## sqrt.conc 0.9858
## ED_Age 0.0007
## ED_HighestGradeBachelor's Degree 0.2733
## ED_HighestGradeHigh School or Less 0.0062
## ED_HighestGradeSome College or Associate Degree 0.0044
## BMI 0.0710
## ED_Pain 0.0002
## RaceNon-Hispanic Black 0.7703
## RaceNon-Hispanic Other 0.2711
## RaceNon-Hispanic White 0.0770
## Plate.Number 0.8012
## e2_binlow E2 0.6056
## e2_binmid E2 0.1299
##
## Correlation:
## (Intr) timpnt sqrt.c ED_Age
## timepoint -0.163
## sqrt.conc -0.689 0.000
## ED_Age -0.317 0.000 0.095
## ED_HighestGradeBachelor's Degree -0.150 0.000 -0.055 0.039
## ED_HighestGradeHigh School or Less -0.253 0.000 0.010 0.170
## ED_HighestGradeSome College or Associate Degree -0.187 0.000 -0.046 0.088
## BMI -0.418 0.000 0.113 -0.154
## ED_Pain -0.272 0.000 -0.035 0.047
## RaceNon-Hispanic Black -0.109 0.000 0.029 -0.142
## RaceNon-Hispanic Other -0.112 0.000 0.027 -0.036
## RaceNon-Hispanic White -0.129 0.000 0.028 -0.181
## Plate.Number -0.175 0.000 0.014 0.006
## e2_binlow E2 -0.628 0.000 0.803 -0.103
## e2_binmid E2 -0.660 0.000 0.771 -0.022
## ED_HGD ED_SoL EDCoAD BMI
## timepoint
## sqrt.conc
## ED_Age
## ED_HighestGradeBachelor's Degree
## ED_HighestGradeHigh School or Less 0.723
## ED_HighestGradeSome College or Associate Degree 0.751 0.833
## BMI -0.113 -0.057 -0.099
## ED_Pain -0.116 -0.213 -0.152 0.029
## RaceNon-Hispanic Black -0.190 -0.318 -0.322 0.004
## RaceNon-Hispanic Other -0.073 -0.177 -0.148 -0.047
## RaceNon-Hispanic White -0.198 -0.212 -0.219 -0.018
## Plate.Number -0.086 -0.016 -0.085 0.109
## e2_binlow E2 -0.071 -0.007 -0.039 0.191
## e2_binmid E2 -0.026 0.013 -0.022 0.133
## ED_Pan RcN-HB RcN-HO RcN-HW
## timepoint
## sqrt.conc
## ED_Age
## ED_HighestGradeBachelor's Degree
## ED_HighestGradeHigh School or Less
## ED_HighestGradeSome College or Associate Degree
## BMI
## ED_Pain
## RaceNon-Hispanic Black 0.022
## RaceNon-Hispanic Other 0.088 0.487
## RaceNon-Hispanic White 0.117 0.722 0.471
## Plate.Number 0.056 -0.106 -0.124 -0.164
## e2_binlow E2 0.015 0.089 0.045 0.074
## e2_binmid E2 0.021 0.097 0.121 0.098
## Plt.Nm e2_bnlE2
## timepoint
## sqrt.conc
## ED_Age
## ED_HighestGradeBachelor's Degree
## ED_HighestGradeHigh School or Less
## ED_HighestGradeSome College or Associate Degree
## BMI
## ED_Pain
## RaceNon-Hispanic Black
## RaceNon-Hispanic Other
## RaceNon-Hispanic White
## Plate.Number
## e2_binlow E2 -0.058
## e2_binmid E2 -0.011 0.829
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.42631492 -0.72849950 0.00436402 0.71323539 2.35250577
##
## Residual standard error: 2.420916
## Degrees of freedom: 708 total; 693 residual
#recode the factors so the binned pain is the outcome of interest
e2_full_long$bin_pain4 <- recode_factor(e2_full_long$bin_pain2,
`0 pain` = "1",
`1 to 9 pain` = "2",
`10 pain` = "3") #new dimensions 1163 x 17
#multinomial logit regression
e2_model <- multinom(bin_pain4 ~ timepoint + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, correlation = corAR1(form = ~ timepoint | PID), control = list(singular.ok = FALSE), data=e2_full_long) #e2_bin low E2 p = 2.398452e-03 = 0.002398452
## # weights: 48 (30 variable)
## initial value 1277.686092
## iter 10 value 978.687050
## iter 20 value 828.822196
## iter 30 value 768.995579
## iter 40 value 768.314759
## final value 768.314605
## converged
#library(kableExtra)
tidy(e2_model, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE)
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 1.1476098 | 0.7192362 | 1.5955951 | 0.1105792 | -0.2620673 | 2.5572868 |
| 2 | timepoint | -0.1977753 | 0.0441436 | -4.4802701 | 0.0000075 | -0.2842952 | -0.1112554 |
| 2 | sqrt.conc | -0.0287966 | 0.0400877 | -0.7183419 | 0.4725465 | -0.1073670 | 0.0497737 |
| 2 | ED_Age | 0.0253562 | 0.0066856 | 3.7926427 | 0.0001491 | 0.0122526 | 0.0384598 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.1368416 | 0.3705171 | 0.3693261 | 0.7118847 | -0.5893586 | 0.8630418 |
| 2 | ED_HighestGradeHigh School or Less | -0.1608027 | 0.3463921 | -0.4642217 | 0.6424889 | -0.8397188 | 0.5181133 |
| 2 | ED_HighestGradeSome College or Associate Degree | -0.0484946 | 0.3412816 | -0.1420956 | 0.8870045 | -0.7173943 | 0.6204051 |
| 2 | BMI | 0.0115329 | 0.0098899 | 1.1661257 | 0.2435636 | -0.0078510 | 0.0309168 |
| 2 | ED_Pain | 0.0911738 | 0.0316376 | 2.8818140 | 0.0039539 | 0.0291652 | 0.1531825 |
| 2 | RaceNon-Hispanic Black | -0.5527326 | 0.2668155 | -2.0715908 | 0.0383036 | -1.0756814 | -0.0297838 |
| 2 | RaceNon-Hispanic Other | -0.3304876 | 0.3902482 | -0.8468653 | 0.3970702 | -1.0953600 | 0.4343847 |
| 2 | RaceNon-Hispanic White | -0.3713711 | 0.2762933 | -1.3441190 | 0.1789099 | -0.9128961 | 0.1701539 |
| 2 | Plate.Number | -0.0211626 | 0.0119249 | -1.7746606 | 0.0759539 | -0.0445349 | 0.0022097 |
| 2 | e2_binlow E2 | 0.1952720 | 0.3712315 | 0.5260114 | 0.5988803 | -0.5323284 | 0.9228724 |
| 2 | e2_binmid E2 | 0.0093582 | 0.2724513 | 0.0343481 | 0.9725995 | -0.5246365 | 0.5433529 |
| 3 | (Intercept) | -3.8222607 | 1.6508923 | -2.3152696 | 0.0205982 | -7.0579502 | -0.5865713 |
| 3 | timepoint | -0.2117971 | 0.0815971 | -2.5956444 | 0.0094414 | -0.3717245 | -0.0518697 |
| 3 | sqrt.conc | -0.0327149 | 0.0825517 | -0.3962960 | 0.6918867 | -0.1945133 | 0.1290835 |
| 3 | ED_Age | 0.0573083 | 0.0119448 | 4.7977392 | 0.0000016 | 0.0338968 | 0.0807197 |
| 3 | ED_HighestGradeBachelor’s Degree | 1.0048357 | 1.1494872 | 0.8741600 | 0.3820311 | -1.2481178 | 3.2577893 |
| 3 | ED_HighestGradeHigh School or Less | 1.6704468 | 1.0845166 | 1.5402684 | 0.1234949 | -0.4551668 | 3.7960603 |
| 3 | ED_HighestGradeSome College or Associate Degree | 1.7184618 | 1.0819146 | 1.5883526 | 0.1122066 | -0.4020517 | 3.8389754 |
| 3 | BMI | -0.0240810 | 0.0196057 | -1.2282606 | 0.2193491 | -0.0625075 | 0.0143456 |
| 3 | ED_Pain | 0.2963213 | 0.0640951 | 4.6231501 | 0.0000038 | 0.1706972 | 0.4219455 |
| 3 | RaceNon-Hispanic Black | -0.5835581 | 0.4182776 | -1.3951455 | 0.1629720 | -1.4033671 | 0.2362509 |
| 3 | RaceNon-Hispanic Other | -0.5588189 | 0.6948961 | -0.8041762 | 0.4212952 | -1.9207901 | 0.8031523 |
| 3 | RaceNon-Hispanic White | -0.8144662 | 0.4747860 | -1.7154384 | 0.0862649 | -1.7450297 | 0.1160973 |
| 3 | Plate.Number | -0.0450055 | 0.0221329 | -2.0334249 | 0.0420096 | -0.0883852 | -0.0016259 |
| 3 | e2_binlow E2 | 0.1952197 | 0.6764448 | 0.2885967 | 0.7728900 | -1.1305877 | 1.5210272 |
| 3 | e2_binmid E2 | -0.6145368 | 0.5141257 | -1.1953045 | 0.2319681 | -1.6222048 | 0.3931311 |
#e2_bin low E2 on category 3 (10 pain) p = 2.398452e-03 = 0.002398452
#this means that having a LOW E2 category is significantly associated with having level 10 PAIN rather than Level 0 Pain
# Fit the multinomial logistic regression model for cases where e2_full_long$time == 2 months
model_m2 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 2, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights: 45 (28 variable)
## initial value 421.867119
## iter 10 value 247.665633
## iter 20 value 222.152402
## iter 30 value 220.540297
## iter 40 value 220.461828
## final value 220.461395
## converged
tidy(model_m2, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE) #low e2 significant vs level 10 pain
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 0.6332172 | 1.4645354 | 0.4323673 | 0.6654745 | -2.2372194 | 3.5036537 |
| 2 | sqrt.conc | -0.0439258 | 0.0817465 | -0.5373413 | 0.5910319 | -0.2041460 | 0.1162945 |
| 2 | ED_Age | 0.0345304 | 0.0143728 | 2.4024876 | 0.0162840 | 0.0063603 | 0.0627005 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.0253283 | 0.7807131 | 0.0324425 | 0.9741191 | -1.5048413 | 1.5554980 |
| 2 | ED_HighestGradeHigh School or Less | -0.5408934 | 0.7316123 | -0.7393170 | 0.4597145 | -1.9748272 | 0.8930404 |
| 2 | ED_HighestGradeSome College or Associate Degree | -0.1115448 | 0.7245922 | -0.1539415 | 0.8776558 | -1.5317195 | 1.3086299 |
| 2 | BMI | -0.0011304 | 0.0200715 | -0.0563180 | 0.9550884 | -0.0404699 | 0.0382091 |
| 2 | ED_Pain | 0.2278132 | 0.0643531 | 3.5400523 | 0.0004000 | 0.1016835 | 0.3539429 |
| 2 | RaceNon-Hispanic Black | -0.9946271 | 0.6589663 | -1.5093747 | 0.1312030 | -2.2861773 | 0.2969231 |
| 2 | RaceNon-Hispanic Other | -0.8541552 | 0.8588881 | -0.9944895 | 0.3199846 | -2.5375450 | 0.8292346 |
| 2 | RaceNon-Hispanic White | -0.9587317 | 0.6680304 | -1.4351618 | 0.1512410 | -2.2680471 | 0.3505838 |
| 2 | Plate.Number | 0.0168240 | 0.0253360 | 0.6640359 | 0.5066673 | -0.0328336 | 0.0664816 |
| 2 | e2_binlow E2 | -0.0731302 | 0.7770463 | -0.0941130 | 0.9250194 | -1.5961130 | 1.4498527 |
| 2 | e2_binmid E2 | -0.1387671 | 0.5726973 | -0.2423044 | 0.8085443 | -1.2612332 | 0.9836990 |
| 3 | (Intercept) | -15.6095805 | 1.6871690 | -9.2519365 | 0.0000000 | -18.9163710 | -12.3027900 |
| 3 | sqrt.conc | -0.0780987 | 0.1330539 | -0.5869704 | 0.5572236 | -0.3388795 | 0.1826821 |
| 3 | ED_Age | 0.0547499 | 0.0213335 | 2.5663871 | 0.0102764 | 0.0129371 | 0.0965627 |
| 3 | ED_HighestGradeBachelor’s Degree | 13.6045622 | 0.8144407 | 16.7041781 | 0.0000000 | 12.0082878 | 15.2008366 |
| 3 | ED_HighestGradeHigh School or Less | 13.5770128 | 0.6342322 | 21.4070066 | 0.0000000 | 12.3339405 | 14.8200851 |
| 3 | ED_HighestGradeSome College or Associate Degree | 14.1572732 | 0.6699970 | 21.1303518 | 0.0000000 | 12.8441031 | 15.4704432 |
| 3 | BMI | -0.0496474 | 0.0349347 | -1.4211501 | 0.1552731 | -0.1181182 | 0.0188233 |
| 3 | ED_Pain | 0.4303770 | 0.1148114 | 3.7485562 | 0.0001779 | 0.2053508 | 0.6554032 |
| 3 | RaceNon-Hispanic Black | -1.1582314 | 0.8419886 | -1.3755904 | 0.1689485 | -2.8084987 | 0.4920359 |
| 3 | RaceNon-Hispanic Other | -1.7429841 | 1.3997560 | -1.2452056 | 0.2130562 | -4.4864555 | 1.0004873 |
| 3 | RaceNon-Hispanic White | -1.1919065 | 0.9022291 | -1.3210685 | 0.1864785 | -2.9602431 | 0.5764301 |
| 3 | Plate.Number | 0.0146474 | 0.0386702 | 0.3787780 | 0.7048527 | -0.0611448 | 0.0904396 |
| 3 | e2_binlow E2 | -0.7254233 | 1.1592525 | -0.6257682 | 0.5314670 | -2.9975163 | 1.5466698 |
| 3 | e2_binmid E2 | -0.9939188 | 0.8655141 | -1.1483565 | 0.2508214 | -2.6902953 | 0.7024577 |
#Fit the model for time point 3 months
model_m3 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 3, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights: 45 (28 variable)
## initial value 419.669894
## iter 10 value 266.809692
## iter 20 value 247.485588
## iter 30 value 246.566597
## final value 246.564576
## converged
tidy(model_m3, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE) #no significance
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 0.7142391 | 1.2378680 | 0.5769913 | 0.5639453 | -1.7119376 | 3.1404158 |
| 2 | sqrt.conc | -0.0530729 | 0.0694125 | -0.7646012 | 0.4445091 | -0.1891189 | 0.0829731 |
| 2 | ED_Age | 0.0269198 | 0.0116536 | 2.3099983 | 0.0208882 | 0.0040792 | 0.0497604 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.1492462 | 0.6786421 | 0.2199189 | 0.8259343 | -1.1808679 | 1.4793603 |
| 2 | ED_HighestGradeHigh School or Less | -0.2373798 | 0.6272214 | -0.3784626 | 0.7050870 | -1.4667113 | 0.9919516 |
| 2 | ED_HighestGradeSome College or Associate Degree | -0.1810475 | 0.6186932 | -0.2926289 | 0.7698058 | -1.3936638 | 1.0315688 |
| 2 | BMI | 0.0204176 | 0.0175726 | 1.1619012 | 0.2452756 | -0.0140240 | 0.0548592 |
| 2 | ED_Pain | 0.0504263 | 0.0553806 | 0.9105411 | 0.3625372 | -0.0581177 | 0.1589703 |
| 2 | RaceNon-Hispanic Black | -0.7860288 | 0.4907704 | -1.6016223 | 0.1092391 | -1.7479211 | 0.1758635 |
| 2 | RaceNon-Hispanic Other | -0.2419856 | 0.7248566 | -0.3338393 | 0.7385008 | -1.6626784 | 1.1787072 |
| 2 | RaceNon-Hispanic White | -0.6911603 | 0.5073726 | -1.3622342 | 0.1731240 | -1.6855924 | 0.3032717 |
| 2 | Plate.Number | -0.0074184 | 0.0207205 | -0.3580208 | 0.7203278 | -0.0480297 | 0.0331930 |
| 2 | e2_binlow E2 | 0.0100750 | 0.6477340 | 0.0155542 | 0.9875900 | -1.2594603 | 1.2796103 |
| 2 | e2_binmid E2 | -0.0795187 | 0.4755080 | -0.1672290 | 0.8671899 | -1.0114973 | 0.8524599 |
| 3 | (Intercept) | -3.8298945 | 2.5683366 | -1.4911965 | 0.1359099 | -8.8637417 | 1.2039528 |
| 3 | sqrt.conc | -0.0088107 | 0.1510366 | -0.0583348 | 0.9534819 | -0.3048370 | 0.2872156 |
| 3 | ED_Age | 0.0541307 | 0.0219750 | 2.4632856 | 0.0137670 | 0.0110605 | 0.0972009 |
| 3 | ED_HighestGradeBachelor’s Degree | -0.1025653 | 1.4258754 | -0.0719314 | 0.9426565 | -2.8972298 | 2.6920992 |
| 3 | ED_HighestGradeHigh School or Less | 0.4101094 | 1.2582336 | 0.3259406 | 0.7444693 | -2.0559832 | 2.8762020 |
| 3 | ED_HighestGradeSome College or Associate Degree | 0.0358165 | 1.2661373 | 0.0282880 | 0.9774324 | -2.4457670 | 2.5174000 |
| 3 | BMI | -0.0028174 | 0.0340501 | -0.0827416 | 0.9340570 | -0.0695544 | 0.0639197 |
| 3 | ED_Pain | 0.3491565 | 0.1237802 | 2.8207789 | 0.0047907 | 0.1065518 | 0.5917611 |
| 3 | RaceNon-Hispanic Black | -1.1091481 | 0.7432089 | -1.4923773 | 0.1356003 | -2.5658107 | 0.3475145 |
| 3 | RaceNon-Hispanic Other | -0.7506290 | 1.3307111 | -0.5640811 | 0.5726989 | -3.3587749 | 1.8575169 |
| 3 | RaceNon-Hispanic White | -0.8894535 | 0.8131545 | -1.0938309 | 0.2740292 | -2.4832071 | 0.7043000 |
| 3 | Plate.Number | -0.0826449 | 0.0446737 | -1.8499656 | 0.0643185 | -0.1702038 | 0.0049140 |
| 3 | e2_binlow E2 | 0.3192749 | 1.2475478 | 0.2559220 | 0.7980111 | -2.1258739 | 2.7644238 |
| 3 | e2_binmid E2 | -0.6698535 | 0.9487884 | -0.7060093 | 0.4801823 | -2.5294445 | 1.1897376 |
#Fit the model for time point 6 months
model_m6 <- multinom(bin_pain4 ~ sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin, subset = timepoint == 6, control = list(singular.ok = FALSE), data = e2_full_long)
## # weights: 45 (28 variable)
## initial value 436.149079
## iter 10 value 307.224469
## iter 20 value 284.418287
## iter 30 value 282.243213
## iter 40 value 282.104992
## iter 50 value 282.103013
## iter 50 value 282.103010
## iter 50 value 282.103010
## final value 282.103010
## converged
tidy(model_m6, conf.int = TRUE) %>%
kable() %>% kable_styling("basic", full_width = FALSE) #plate number is significant????, no e2 significance
| y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| 2 | (Intercept) | 0.0712850 | 1.0728075 | 0.0664472 | 0.9470218 | -2.0313790 | 2.1739491 |
| 2 | sqrt.conc | -0.0006349 | 0.0632923 | -0.0100314 | 0.9919963 | -0.1246856 | 0.1234158 |
| 2 | ED_Age | 0.0199455 | 0.0101454 | 1.9659702 | 0.0493021 | 0.0000609 | 0.0398301 |
| 2 | ED_HighestGradeBachelor’s Degree | 0.1881553 | 0.5495107 | 0.3424051 | 0.7320460 | -0.8888659 | 1.2651764 |
| 2 | ED_HighestGradeHigh School or Less | 0.1257468 | 0.5203582 | 0.2416543 | 0.8090481 | -0.8941366 | 1.1456302 |
| 2 | ED_HighestGradeSome College or Associate Degree | 0.0885682 | 0.5102420 | 0.1735807 | 0.8621950 | -0.9114878 | 1.0886241 |
| 2 | BMI | 0.0121089 | 0.0152307 | 0.7950334 | 0.4265941 | -0.0177427 | 0.0419605 |
| 2 | ED_Pain | 0.0421307 | 0.0495129 | 0.8509044 | 0.3948225 | -0.0549128 | 0.1391742 |
| 2 | RaceNon-Hispanic Black | -0.2452478 | 0.3799673 | -0.6454446 | 0.5186391 | -0.9899700 | 0.4994744 |
| 2 | RaceNon-Hispanic Other | -0.2149588 | 0.5827748 | -0.3688541 | 0.7122365 | -1.3571764 | 0.9272587 |
| 2 | RaceNon-Hispanic White | 0.0861751 | 0.3968412 | 0.2171527 | 0.8280893 | -0.6916193 | 0.8639695 |
| 2 | Plate.Number | -0.0534691 | 0.0185320 | -2.8852326 | 0.0039112 | -0.0897911 | -0.0171471 |
| 2 | e2_binlow E2 | 0.4898350 | 0.5723151 | 0.8558834 | 0.3920623 | -0.6318820 | 1.6115520 |
| 2 | e2_binmid E2 | 0.1372568 | 0.4195159 | 0.3271791 | 0.7435324 | -0.6849792 | 0.9594929 |
| 3 | (Intercept) | -17.8847896 | 1.8883086 | -9.4713277 | 0.0000000 | -21.5858064 | -14.1837727 |
| 3 | sqrt.conc | -0.0087239 | 0.1694346 | -0.0514886 | 0.9589362 | -0.3408097 | 0.3233618 |
| 3 | ED_Age | 0.0756291 | 0.0214585 | 3.5244337 | 0.0004244 | 0.0335712 | 0.1176870 |
| 3 | ED_HighestGradeBachelor’s Degree | 12.1648748 | 1.0485328 | 11.6018063 | 0.0000000 | 10.1097882 | 14.2199614 |
| 3 | ED_HighestGradeHigh School or Less | 13.7826016 | 0.6979667 | 19.7467889 | 0.0000000 | 12.4146120 | 15.1505913 |
| 3 | ED_HighestGradeSome College or Associate Degree | 13.7298894 | 0.7384538 | 18.5927530 | 0.0000000 | 12.2825465 | 15.1772323 |
| 3 | BMI | -0.0246015 | 0.0351234 | -0.7004303 | 0.4836586 | -0.0934421 | 0.0442391 |
| 3 | ED_Pain | 0.1905089 | 0.1038627 | 1.8342384 | 0.0666186 | -0.0130582 | 0.3940761 |
| 3 | RaceNon-Hispanic Black | 0.1621323 | 0.7508772 | 0.2159238 | 0.8290471 | -1.3095600 | 1.6338245 |
| 3 | RaceNon-Hispanic Other | 0.5361732 | 1.0952850 | 0.4895285 | 0.6244676 | -1.6105459 | 2.6828924 |
| 3 | RaceNon-Hispanic White | -0.9740496 | 0.9465993 | -1.0289989 | 0.3034802 | -2.8293501 | 0.8812509 |
| 3 | Plate.Number | -0.0650576 | 0.0379438 | -1.7145790 | 0.0864224 | -0.1394260 | 0.0093108 |
| 3 | e2_binlow E2 | 1.0543342 | 1.3008846 | 0.8104748 | 0.4176673 | -1.4953528 | 3.6040211 |
| 3 | e2_binmid E2 | -0.1763840 | 0.9934211 | -0.1775521 | 0.8590748 | -2.1234535 | 1.7706856 |
#m <- glmer(bin_pain4 ~ time + sqrt.conc + ED_Age + ED_HighestGrade + BMI + ED_Pain + Race + Plate.Number + e2_bin + (1 | PID), data = e2_full_long, family = binomial, control = glmerControl(optimizer = "bobyqa"), nAGQ = 10)
miRNA data
#I don't think updated miRNA data was sent? I'll use the miRNA data from 2021 from this original code
#this file was generated from "AURORA_new_QC_sRNA_06142021.R", which I do not have, this is from Ying
miRNA_cleaned <- read.csv("AURORA_sRNA_cleaned_596_06.14.2021.csv")
miRNA_cleaned$sample <- rownames(miRNA_cleaned)
#miRNA_cleaned dimensions 596 x 1552
Link miRNA with keys
#sample ID to inventory ID and PID Keys
sampleID <- read.csv("ID_to_SampleID_all.csv")
ID_link <- readxl::read_excel("Final Sample List_UNC BSP RNA&Plasma.xlsx")
ID2 <- ID_link %>%
right_join(sampleID,by=c("INVENTORY CODE" = "Inventory.code", "SAMPLE ID" = "sampleID_sub"))
ID_miRNA <- miRNA_cleaned %>%
mutate(sample=as.numeric(sample)) %>%
left_join(ID2,by="sample") #596 samples
length(unique(ID_miRNA$PID)) # unique PID 294
## [1] 294
subset to only the timepoint 0/ED visit miRNA values
ID_miRNA_T0 <- ID_miRNA %>%
filter(`ALT ID` == "T0") #dimensions 294 x 1557
miRNA
NEW MARCH 2024 CODE FOR miRNA
none of the PIDs match up between the new estrogen data and the miRNA data
#unique(e2_full_long$PID)
#ID_miRNA$PID
#typeof(e2_full_long$PID)
#typeof(ID_miRNA$PID)
#e2_wide_all<- e2_full_wide %>%
# inner_join(ID_miRNA,by="PID")
#E2_long_all<- e2_full_long %>%
# inner_join(ID_miRNA,by="PID")