This document contains the code for the AURORA ELA analysis with a Z TRANSFORMATION ON ALL MODEL DATA. In this analysis, we will explore the relationship between childhood trauma (CTQ) and pain outcomes in the AURORA dataset. We will perform correlation analysis, linear models, repeated measures models, and assess the relationship between CTQ and pain outcome trajectories. This is a preliminary draft of the analysis that will be completed over the next 2 weeks.
rm(list = ls())
libraries <- c("tidyverse", "MASS", "rstatix", "corrplot", "nlme", "sjPlot", "readxl", "nnet", "broom", "lmtest", "forcats","car", "highcharter", "ggplot2", "corrr", "ggcorrplot", "gridExtra", "skimr", "pROC", "gtsummary", "dunn.test", "tidyr", "purrr", "dplyr")
suppressMessages(suppressWarnings(
invisible(lapply(libraries, library, character.only = TRUE))))
Note: For the purpose of this draft code file, Lauren has already
combined all of the freeze 4 datasets into one file. This pre-combined
file, called ELA_full_data_raw.csv below, will be used for
the initial analysis. The full file import code will be rendered and
applied in the final script
#Read in the dataset
data_1 <- read.csv("AURORA_full_update02102022_New.csv",check.names=F)[-1]
#freeze 4 datasets
AURORA_Freeze_4_general_mod <- read_csv("AURORA_Freeze_4_general_mod.csv",na = ".")
AURORA_Freeze_4_demogr_mod <- read_csv("AURORA_Freeze_4_demogr_mod.csv",na = ".")
AURORA_Freeze_4_pain_mod <- read_csv("AURORA_Freeze_4_pain_mod.csv",na = ".")
AURORA_Freeze_4_ctq_mod <- read_csv("AURORA_Freeze_4_ctq_mod.csv",na = ".")
AURORA_F4_CTQ_item <- readxl::read_excel("AURORA_F4_CTQ_item.xlsx")
#AURORA_Freeze_4_pdi_mod <- read_csv("AURORA_Freeze_4_pdi_mod.csv",na = ".")
SiteID <- read_excel("SiteID.xlsx")
FZ4_df0 <- AURORA_Freeze_4_demogr_mod %>% inner_join(AURORA_F4_CTQ_item,by="PID") %>%
inner_join(AURORA_Freeze_4_pain_mod,by="PID") %>%
inner_join(AURORA_Freeze_4_ctq_mod,by="PID") %>%
#inner_join(AURORA_Freeze_4_pdi_mod,by="PID") %>%
inner_join(SiteID,by="PID") %>%
mutate(Site_New = case_when(SiteID %in% c("Baystate","BMC", "Beth Israel", "BWH", "Cooper","Einstein","Jefferson","MGH","Miriam","Penn","Rhode Island","St. John","St. Joseph","Temple","UMass") ~ "NorthEast",SiteID %in% c("Baylor", "Emory ED", "Jacksonville","UAB","UNC","UT Houston","UT Southwestern","Vanderbilt") ~ "SouthEast", SiteID %in% c("39","49","Beaumont Royal Oak","Beaumont Troy","Eskenazi","Henry Ford","Cincinnati","Indiana","WashU","WashU DP") ~ "Midwest")) %>%
convert_as_factor(Site_New,ED_GenderBirthCert,ED_GenderNow,ED_Marital,ED_RaceEthCode,ED_highestgrade,WK2_EmploymentCode,WK2_IncomeCode) %>%
mutate_at(vars(contains("WK2_Childhood")),as.numeric) %>% dplyr::select(-SiteID) %>% #2682
mutate(WK2_Bullying_Total = WK2_ChildhoodBullying+WK2_ChildhoodHitOrHurt) %>%
inner_join(data_1 %>% dplyr::select(PID,WK8_Pain_C,M3_Pain_C,M6_Pain_C),by="PID") %>%
filter(is.na(WK2_CTQSF_SexAbu_RS) == F) %>%
filter(is.na(WK2_CTQSF_PhyAbu_RS)== F) %>%
filter(is.na(WK2_CTQSF_EmoAbu_RS)== F) %>%
filter(is.na(WK2_CTQSF_EmoNeg_RS) == F) %>%
filter(is.na(WK2_CTQSF_PhyNeg_RS) == F) %>%
filter(is.na(WK2_Bullying_Total) == F) %>% #2483
mutate(log_WK2_CTQSF_Total_RS = log2(WK2_CTQSF_Total_RS+1),log_ED_NowPain=log2(ED_NowPain+1))
Below: Read in the completed dataframe with each of the separate AURORA files. Lauren did this on her end for ease of use in this draft code analysis. Latent class trajectories also imported.
Troubleshooting why the R environment is all fucked up and nothing works
#install.packages("dplyr")
library(readxl)
library(dplyr)
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
# Explicitly set select to refer to dplyr::select
select <- dplyr::select
# Import datasets
FZ4_df <- read_csv("ELA_full_data_raw.csv")
New names:
• `` -> `...1`
Rows: 2483 Columns: 78
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Site_New
dbl (77): ...1, PID, ED_GenderBirthCert, ED_GenderNow, ED_Age, ED_Marital, ED_RaceEthCode, ED_highestgrade, BMI, WK2_Employment...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pain_trajectory <- read_csv("pain_latent_class_4.csv")
Rows: 2943 Columns: 6
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): pain_Class_MostLikely
dbl (5): PID, Probability_Class1, Probability_Class2, Probability_Class3, Probability_Class4
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PTSD_df <- read_excel("AURORA_Freeze_4_ptsd_mod.xlsx")
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
Injury_df <- read_excel("AURORA_Freeze_4_injury_mod.xlsx")
ADI_df <- read_excel("AURORA_Freeze_4_ses_mod.xlsx",na = ".")
# Combine datasets
df_traj <- FZ4_df %>%
inner_join(Trauma_df %>% select(PID, ED_Event_BroadClass), by = "PID") %>%
inner_join(Injury_df %>% select(PID, ED_Concussion), by = "PID") %>%
inner_join(ADI_df %>% select(PID, ADI_NatRank), by = "PID") %>%
inner_join(pain_trajectory %>% select(PID, pain_Class_MostLikely), by = "PID")
#export <- df_traj[,c(2,82,70)]
#write.csv(export, "ELA_full_data.csv")
#import xiady df AURORA_Construct_Score_FS_Frz4_Data.csv
df_xiaodi <- read_csv("AURORA_Construct_Score_FS_Frz4_Data.csv")
Rows: 2887 Columns: 141
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (140): Pain_1, Pain_9, Pain_21, Pain_31, Pain_43, Pain_53, Pain_67, Pain_77, Pain_105, Pain_147, Pain_196, Pain_238, Pain_2...
dbl (1): PID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#get rid of everything but the variables that start with Pain
df_xiaodi2 <- df_xiaodi %>% select(PID, starts_with("Pain")) %>% as.data.frame()
# bind it by PID to our data set and send me a simpler CSV that is just PID, Pain Trajectory, and Pain Scores for all the time points?
pain_scores_df <- df_traj[,c(2,82)]
#join pain_scores_df and df_xiaodi2 by PID
xiaodi_df_for_Lauren <- pain_scores_df %>% inner_join(df_xiaodi2, by = "PID")
#write csv of xiaodi_df_for_Lauren
write.csv(xiaodi_df_for_Lauren, "xiaodi_df_for_Lauren.csv")
Refactor pain trajectories and Relevel to “low pain” as the reference group
df_traj <- df_traj %>%
rename(Pain_Class = pain_Class_MostLikely)
df_traj <- df_traj %>%
mutate(Pain_Class = case_when(
Pain_Class == "Class 1" ~ "moderate recovery",
Pain_Class == "Class 2" ~ "moderate",
Pain_Class == "Class 3" ~ "low",
Pain_Class == "Class 4" ~ "high",
TRUE ~ as.character(Pain_Class) # This handles any unexpected values
)) %>%
mutate(Pain_Class = factor(Pain_Class)) %>%
mutate(Pain_Class = relevel(Pain_Class, ref = "low"))
Factorize and rename categorical variables
factor_vars <- c("ED_GenderBirthCert", "ED_Event_BroadClass", "ED_Marital",
"WK2_EmploymentCode", "ED_highestgrade", "ED_RaceEthCode", "PRE_Pain_MdSv")
df_traj[factor_vars] <- lapply(df_traj[factor_vars], as.factor)
#in gender, change 1 to male and 2 to female
df_traj$ED_GenderBirthCert <- ifelse(df_traj$ED_GenderBirthCert == 1, "Male", "Female")
#in pre pain, change 1 to yes and 0 to no .. is this correct?
df_traj$PRE_Pain_MdSv <- ifelse(df_traj$PRE_Pain_MdSv == 1, "Yes", "No")
# Convert continuous variables to numeric
df_traj$ED_PDI_RS <- as.numeric(df_traj$ED_PDI_RS)
df_traj$ED_Concussion <- as.numeric(df_traj$ED_Concussion)
Warning: NAs introduced by coercion
#rename WK2 CTQ total column to CTQ_Total
df_traj <- df_traj %>%
rename(CTQ_Total = WK2_CTQSF_Total_RS)
general clean up, renaming, removing negatives
#Make any value less than 0 in the bullying total == NA
df_traj$WK2_Bullying_Total <- ifelse(df_traj$WK2_Bullying_Total < 0, NA, df_traj$WK2_Bullying_Total)
#Make any value less than 0 in ADI == NA
df_traj$ADI_NatRank <- ifelse(df_traj$ADI_NatRank < 0, NA, df_traj$ADI_NatRank)
#3 bullying rows have negative numbers
#remove the rows in WK2_Bullying_Total that contain negative values
df_traj <- df_traj %>% filter(WK2_Bullying_Total >= 0)
relevel categories for trauma type
df_traj <- df_traj %>%
mutate(ED_Event_BroadClass = case_when(
ED_Event_BroadClass == "1" ~ "MVC/Non-motor Collision",
ED_Event_BroadClass == "6" ~ "MVC/Non-motor Collision",
ED_Event_BroadClass == "2" ~ "Physical/Sexual Abuse",
ED_Event_BroadClass == "3" ~ "Physical/Sexual Abuse",
ED_Event_BroadClass == "4" ~ "Other",
ED_Event_BroadClass == "5" ~ "Other",
ED_Event_BroadClass == "7" ~ "Other",
ED_Event_BroadClass == "8" ~ "Other",
ED_Event_BroadClass == "9" ~ "Other",
ED_Event_BroadClass == "10" ~ "Other",
ED_Event_BroadClass == "11" ~ "Other",
TRUE ~ as.character(ED_Event_BroadClass) # Keeps any other values unchanged
))
# Convert ED_Event broad class to factor
df_traj$ED_Event_BroadClass <- factor(df_traj$ED_Event_BroadClass)
table(df_traj$ED_Event_BroadClass)
MVC/Non-motor Collision Other Physical/Sexual Abuse
1909 340 231
Relevel categories for education
#relevel education category
# 1- did not finish HS (codes 0-12) 2- HS grad + some college (codes 13-15) 3- college grad (Bachelors or Associates) (codes 16-18) 4- post grad codes(19-21)
df_traj <- df_traj %>%
mutate(ED_highestgrade = case_when(
ED_highestgrade == "0" ~ "Did not finish HS",
ED_highestgrade == "1" ~ "Did not finish HS",
ED_highestgrade == "7" ~ "Did not finish HS",
ED_highestgrade == "8" ~ "Did not finish HS",
ED_highestgrade == "9" ~ "Did not finish HS",
ED_highestgrade == "10" ~ "Did not finish HS",
ED_highestgrade == "11" ~ "Did not finish HS",
ED_highestgrade == "12" ~ "Did not finish HS",
ED_highestgrade == "13" ~ "HS Grad/Some College",
ED_highestgrade == "14" ~ "HS Grad/Some College",
ED_highestgrade == "15" ~ "HS Grad/Some College",
ED_highestgrade == "16" ~ "Bach or Assoc degree",
ED_highestgrade == "17" ~ "Bach or Assoc degree",
ED_highestgrade == "18" ~ "Bach or Assoc degree",
ED_highestgrade == "19" ~ "Post-graduate education",
ED_highestgrade == "20" ~ "Post-graduate education",
ED_highestgrade == "21" ~ "Post-graduate education",
ED_highestgrade == "-7" ~ NA,
ED_highestgrade == "-5" ~ NA,
TRUE ~ as.character(ED_highestgrade) # Keeps any other values unchanged
))
# Convert to factor
df_traj$ED_highestgrade <- factor(df_traj$ED_highestgrade)
table(df_traj$ED_highestgrade)
Bach or Assoc degree Did not finish HS HS Grad/Some College Post-graduate education
695 267 1321 189
Re-level marriage category
#relevel marrital status category
#marital status 1-married 2-separated 3-divorced 4-annulled 5-widowed 6-single
#4. marriage - combine 2+3+4+5 as a single category
df_traj <- df_traj %>%
mutate(ED_Marital = case_when(
ED_Marital == "1" ~ "Married",
ED_Marital == "2" ~ "Divorced/Widowed",
ED_Marital == "3" ~ "Divorced/Widowed",
ED_Marital == "4" ~ "Divorced/Widowed",
ED_Marital == "5" ~ "Divorced/Widowed",
ED_Marital == "6" ~ "Single",
TRUE ~ as.character(ED_Marital) # Keeps any other values unchanged
))
# Convert to factor
df_traj$ED_Marital <- factor(df_traj$ED_Marital)
table(df_traj$ED_Marital)
Divorced/Widowed Married Single
446 539 1483
Subset df_traj to our final df that will be used for model analysis, keep only the features needed.
df <- df_traj %>%
select(PID, Pain_Class,ED_Age, Site_New, ED_RaceEthCode, ED_GenderBirthCert, ADI_NatRank, CTQ_Total,
BMI, PRE_Pain_MdSv, WK2_EmploymentCode, ED_Marital, ED_highestgrade,
ED_Concussion, ED_Event_BroadClass, ED_PDI_RS, WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_EmoAbu_RS, WK2_CTQSF_SexAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoNeg_RS, WK2_Bullying_Total)
Add the CTQ triad score that is a combination of bullying, physical abuse, and emotional abuse scores. Sarah requested to examine the two different types of bullying questions
#add combination CTQ score
df$CTQ_Triad <- df$WK2_Bullying_Total + df$WK2_CTQSF_PhyAbu_RS + df$WK2_CTQSF_EmoAbu_RS
#print the rows that have NA in the CTQ triad
df %>% filter(is.na(CTQ_Triad))
#Confirms that for the CTQ triad, if any of the additive features contain NA, then the final score is NA
# Add the two different types of bullying question
df <- df %>%
left_join(FZ4_df %>% select(PID, BulliedSimple = 20, BulliedHitOrHurt = 21), by = "PID")
#remove first column PID
df <- df %>% select(-PID)
#make all of df[,c(2,6:8)]) numeric
df[,c(2,6:8)] <- lapply(df[,c(2,6:8)], as.numeric)
Binary scores for later models
df$Bullying_Any <- ifelse(df$WK2_Bullying_Total > 0, 1, 0)
df$PhyAbu_Any <- ifelse(df$WK2_CTQSF_PhyAbu_RS > 0, 1, 0)
df$EmoAbu_Any <- ifelse(df$WK2_CTQSF_EmoAbu_RS > 0, 1, 0)
df$SexAbu_Any <- ifelse(df$WK2_CTQSF_SexAbu_RS > 0, 1, 0)
df$PhyNeg_Any <- ifelse(df$WK2_CTQSF_PhyNeg_RS > 0, 1, 0)
df$EmoNeg_Any <- ifelse(df$WK2_CTQSF_EmoNeg_RS > 0, 1, 0)
df$CTQ_Any <- ifelse(df$CTQ_Total > 0, 1, 0)
Z-scaling is a method of standardizing the data so that it has a mean
of 0 and a standard deviation of 1. This is done to make the data more
interpretable and to ensure that the variables are on the same scale.
the scale function works by subtracting the mean of the
data from each value and then dividing by the standard deviation. This
is done for each column in the data frame.
#scale
df[, c(7, 16:22)] <- scale(df[, c(7, 16:22)])
df[, c(7, 16:22)] <- lapply(df[, c(7, 16:22)], function(x) as.numeric(scale(x)))
#str(df)
summary(df)
Main multi-nominal regression model with CTQ as a predictor variable, including our standard covariates (same in linear models), and latent pain trajectories as the dependent variables. Model 2 is this with extra features. extract marginal means from base model for making graphs
#pain trajectory ~ age + sex + race + site + CTQ
m1 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total, data = df)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3145.752220
iter 20 value 3098.312611
iter 30 value 3081.080649
final value 3080.567070
converged
#view output
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#examine variance inflation factor to assess collinearity
vif(m1)
Warning in vif.default(m1) : No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 12.103945 1 3.479072
Site_New 3.792127 2 1.395471
ED_RaceEthCode 16.213444 3 1.590911
ED_GenderBirthCert 1.753029 1 1.324020
CTQ_Total 1.371539 1 1.171127
Variance Inflation Factor (VIF) is used to detect multicollinearity. VIF > 10: Indicates high multicollinearity that might be problematic. VIF between 5 and 10: Moderate multicollinearity that might need addressing. VIF < 5: Generally considered acceptable.
m2 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2741.339524
iter 20 value 2550.555155
iter 30 value 2496.401666
iter 40 value 2482.971837
iter 50 value 2482.225214
final value 2482.128451
converged
tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
vif(m2)
Warning in vif.default(m2) : No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 17.705104 1 4.207743
Site_New 4.738185 2 1.475377
ED_RaceEthCode 19.673930 3 1.643041
ED_GenderBirthCert 1.941991 1 1.393553
CTQ_Total 1.448485 1 1.203530
BMI 18.363080 1 4.285216
PRE_Pain_MdSv 3.067227 1 1.751350
ED_Marital 10.289081 2 1.790994
ADI_NatRank 12.774021 1 3.574076
ED_Event_BroadClass 2.156180 2 1.211773
ED_PDI_RS 7.261578 1 2.694732
#3rd model is same as M2 but with sex*CTQ interaction
m3 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Concussion + ED_PDI_RS + ED_GenderBirthCert*CTQ_Total, data = df_traj)
# weights: 60 (42 variable)
initial value 3013.803941
iter 10 value 2869.274549
iter 20 value 2732.072060
iter 30 value 2594.035122
iter 40 value 2585.329903
iter 50 value 2585.072938
iter 50 value 2585.072937
iter 50 value 2585.072937
final value 2585.072937
converged
tidy(m3, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#4th model is same as m0 but only week 2 emotional abuse scale
m4 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3158.803635
iter 20 value 3078.403490
iter 30 value 3066.446141
final value 3066.318761
converged
tidy(m4, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#5th model is physical abuse
m5 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3162.461193
iter 20 value 3091.560918
iter 30 value 3080.983373
final value 3080.817912
converged
tidy(m5, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))%>%
filter(term == "CTQ_Total")
#physical abuse*sex interaction term added
m5b <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + WK2_CTQSF_PhyAbu_RS*ED_GenderBirthCert, data = df_traj)
# weights: 44 (30 variable)
initial value 3428.305955
iter 10 value 3150.529982
iter 20 value 3091.547082
iter 30 value 3076.766799
final value 3075.806125
converged
tidy(m5b, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#6th model is sexual abuse
m6 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3175.149270
iter 20 value 3101.636730
iter 30 value 3092.659879
final value 3092.509370
converged
tidy(m6, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#7th model is emotional neglect
m7 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3153.150877
iter 20 value 3111.094505
iter 30 value 3106.959691
final value 3106.761270
converged
tidy(m7, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#8th model is physical neglect
m8 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3159.001543
iter 20 value 3114.067693
iter 30 value 3106.323901
final value 3106.263598
converged
tidy(m8, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m9 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS*WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 80 (57 variable)
initial value 2905.672981
iter 10 value 2729.958856
iter 20 value 2564.676198
iter 30 value 2490.109282
iter 40 value 2471.353798
iter 50 value 2469.747882
iter 60 value 2469.681562
final value 2469.678836
converged
#10th model is bullying
m10 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3138.073451
iter 20 value 3084.400326
iter 30 value 3076.703231
final value 3076.498579
converged
tidy(m10, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m11 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + WK2_Bullying_Total + WK2_CTQSF_PhyAbu_RS , data = df)
# weights: 48 (33 variable)
initial value 3428.305955
iter 10 value 3108.962478
iter 20 value 3072.504450
iter 30 value 3059.099210
final value 3057.237153
converged
tidy(m11, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m12 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad, data = df)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3141.620645
iter 20 value 3075.520679
iter 30 value 3062.257889
final value 3062.073356
converged
tidy(m12, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
vif(m12)
Warning in vif.default(m12) : No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 12.129389 1 3.482727
Site_New 3.808713 2 1.396994
ED_RaceEthCode 16.418497 3 1.594247
ED_GenderBirthCert 1.749242 1 1.322589
CTQ_Triad 1.398927 1 1.182763
m13 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad + BMI + PRE_Pain_MdSv + ED_Concussion + ED_Event_BroadClass + ED_PDI_RS + ADI_NatRank, data = df)
# weights: 68 (48 variable)
initial value 2915.377041
iter 10 value 2748.168396
iter 20 value 2550.846915
iter 30 value 2496.762252
iter 40 value 2487.701025
iter 50 value 2487.230911
final value 2487.224790
converged
tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m14 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_RaceEthCode, data = df)
# weights: 84 (60 variable)
initial value 2905.672981
iter 10 value 2738.781285
iter 20 value 2545.566498
iter 30 value 2491.153216
iter 40 value 2477.385899
iter 50 value 2476.228437
iter 60 value 2476.171183
final value 2476.169885
converged
tidy(m14, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m15 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_GenderBirthCert, data = df)
# weights: 76 (54 variable)
initial value 2905.672981
iter 10 value 2740.351683
iter 20 value 2546.525161
iter 30 value 2493.081747
iter 40 value 2480.739654
iter 50 value 2479.964436
iter 60 value 2479.938816
final value 2479.937746
converged
tidy(m15, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#physical abuse m16
m16 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2739.152319
iter 20 value 2548.076833
iter 30 value 2486.719546
iter 40 value 2478.571787
iter 50 value 2477.992574
final value 2477.950997
converged
tidy(m16, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#physical neglect m17
m17 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.269534
iter 20 value 2580.210494
iter 30 value 2504.715204
iter 40 value 2492.497525
iter 50 value 2491.587041
final value 2491.509002
converged
tidy(m17, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#emotional abuse m18
m18 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2739.343013
iter 20 value 2549.575062
iter 30 value 2486.449855
iter 40 value 2475.071211
iter 50 value 2474.358717
final value 2474.325475
converged
tidy(m18, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#emotional neglect m19
m19 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.115876
iter 20 value 2591.191332
iter 30 value 2503.297991
iter 40 value 2491.076542
iter 50 value 2490.377414
final value 2490.335809
converged
tidy(m19, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#sexual abuse m20
m20 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2742.336087
iter 20 value 2564.102062
iter 30 value 2497.718946
iter 40 value 2487.454342
iter 50 value 2486.822604
final value 2486.781130
converged
tidy(m20, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#bullying m21
m21 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2742.146186
iter 20 value 2551.065763
iter 30 value 2488.904082
iter 40 value 2480.381656
iter 50 value 2479.548137
final value 2479.495450
converged
tidy(m21, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m22 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + ED_GenderBirthCert*CTQ_Total, data = df)
# weights: 44 (30 variable)
initial value 3428.305955
iter 10 value 3164.934899
iter 20 value 3098.577104
iter 30 value 3077.554697
final value 3075.998418
converged
tidy(m22, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m23 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + ED_RaceEthCode*CTQ_Total, data = df)
# weights: 52 (36 variable)
initial value 3428.305955
iter 10 value 3160.593618
iter 20 value 3107.986141
iter 30 value 3077.131243
iter 40 value 3073.882501
final value 3073.873784
converged
tidy(m23, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
df_fem <- df %>% filter(ED_GenderBirthCert == "Female")
m24 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data=df_fem)
# weights: 68 (48 variable)
initial value 1821.590791
iter 10 value 1748.500314
iter 20 value 1642.086592
iter 30 value 1596.954041
iter 40 value 1595.854725
iter 50 value 1595.840215
final value 1595.840106
converged
tidy(m24, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
df_male <- df %>% filter(ED_GenderBirthCert == "Male")
m25 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data=df_male)
# weights: 68 (48 variable)
initial value 1084.082190
iter 10 value 965.172766
iter 20 value 895.187858
iter 30 value 869.978395
iter 40 value 868.728905
final value 868.722779
converged
tidy(m25, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#Model 2 but instead of physical abuse score, its a binary score of whether or not the pt had any physical abuse
m26 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.215465
iter 20 value 2538.670319
iter 30 value 2490.267312
iter 40 value 2481.883743
iter 50 value 2481.407674
final value 2481.370933
converged
tidy(m26, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
Model 27: M2 + Emo_Abuse ANY
#Model 2 but instead of emotional abuse score, its a binary score of whether or not the pt had any emotional abuse
m27 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.545586
iter 20 value 2541.516221
iter 30 value 2489.192593
iter 40 value 2482.375221
iter 50 value 2482.194772
final value 2482.178232
converged
tidy(m27, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 28: M2 + Emo_Neglect ANY
#Model 2 but instead of emotional neglect score, its a binary score of whether or not the pt had any emotional neglect
m28 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.664578
iter 20 value 2558.737778
iter 30 value 2495.816626
iter 40 value 2490.341546
iter 50 value 2490.056292
final value 2490.038111
converged
tidy(m28, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 29: M2 + Phy_Neglect ANY
#Model 2 but instead of physical neglect score, its a binary score of whether or not the pt had any physical neglect
m29 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.702303
iter 20 value 2548.462391
iter 30 value 2494.585863
iter 40 value 2489.264715
iter 50 value 2489.000832
final value 2488.979296
converged
tidy(m29, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 30: M2 + Sexual Abuse ANY
#Model 2 but instead of sexual abuse score, its a binary score of whether or not the pt had any sexual abuse
m30 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + SexAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.380632
iter 20 value 2545.148484
iter 30 value 2492.757380
iter 40 value 2486.885581
iter 50 value 2486.521490
final value 2486.501395
converged
tidy(m30, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#Model 2 but instead of bullying score, its a binary score of whether or not the pt had any bullying
m31 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + Bullying_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.165693
iter 20 value 2562.600366
iter 30 value 2494.807104
iter 40 value 2488.444171
iter 50 value 2488.177859
final value 2488.159649
converged
tidy(m31, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#write a model that does multinomial stepwise selection to predict pain_class from the best features from df
#rewrite stepwise code to remove missing values
invisible(capture.output(
stepwise_selection <- stepAIC(
multinom(Pain_Class ~ ., data = drop_na(df)),
direction = "both",
trace = 0)
))
# Initial features (excluding the response variable 'Pain_Class')
initial_features <- names(df)[names(df) != "Pain_Class"]
included_features <- names(coef(stepwise_selection))
excluded_features <- setdiff(initial_features, included_features)
# Print included and excluded features
cat("Included features:\n")
Included features:
print(included_features)
NULL
cat("\nExcluded features:\n")
Excluded features:
print(excluded_features)
[1] "ED_Age" "Site_New" "ED_RaceEthCode" "ED_GenderBirthCert" "ADI_NatRank"
[6] "CTQ_Total" "BMI" "PRE_Pain_MdSv" "WK2_EmploymentCode" "ED_Marital"
[11] "ED_highestgrade" "ED_Concussion" "ED_Event_BroadClass" "ED_PDI_RS" "WK2_CTQSF_PhyAbu_RS"
[16] "WK2_CTQSF_EmoAbu_RS" "WK2_CTQSF_SexAbu_RS" "WK2_CTQSF_PhyNeg_RS" "WK2_CTQSF_EmoNeg_RS" "WK2_Bullying_Total"
[21] "CTQ_Triad" "BulliedSimple" "BulliedHitOrHurt" "Bullying_Any" "PhyAbu_Any"
[26] "EmoAbu_Any" "SexAbu_Any" "PhyNeg_Any" "EmoNeg_Any" "CTQ_Any"
We wanted to make sure to include an analysis that closely mirrors our first animal figure (prior to SPS/TSE exposure, animals with ELA have increase pain-like behavior). I do not think we have done anything like this yet. Can you run an analysis to test whether there is a difference in PrePain rates/odds based on ELA?
I think this would either be a logistic regression using the CTQ composite and (separately) the Bullying composite, or a chi-square/z-proportion test and we could group CTQ and Bullying into high and low based on their median score (CTQ>6 and Bullying>3) or just use the “any CTQ” and “any Bullying variables”.
# Create binary variables for high/low CTQ and Bullying
df <- df %>%
mutate(
CTQ_HighLow = ifelse(CTQ_Total > 6, "High", "Low"),
Bullying_HighLow = ifelse(WK2_Bullying_Total > 3, "High", "Low"))
# Create binary variable for PRE_Pain_MdSv
df$PRE_Pain_MdSv2 <- ifelse(df$PRE_Pain_MdSv == "Yes", 1, 0)
# M33 Logistic regression using CTQ composite
m33 <- glm(PRE_Pain_MdSv2 ~ CTQ_Total, data = df, family = "binomial")
tidy(m33, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric)))
# M34 Logistic regression using Bullying composite
m34 <- glm(PRE_Pain_MdSv2 ~ WK2_Bullying_Total, data = df, family = "binomial")
tidy(m34, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric)))
# Chi-square test for CTQ_HighLow
chisq.test(table(df$CTQ_HighLow, df$PRE_Pain_MdSv))
Chi-squared test for given probabilities
data: table(df$CTQ_HighLow, df$PRE_Pain_MdSv)
X-squared = 339.7, df = 1, p-value < 2.2e-16
# Chi-square test for Bullying_HighLow
chisq.test(table(df$Bullying_HighLow, df$PRE_Pain_MdSv))
Chi-squared test for given probabilities
data: table(df$Bullying_HighLow, df$PRE_Pain_MdSv)
X-squared = 339.7, df = 1, p-value < 2.2e-16
# chi-square test for any CTQ
chisq.test(table(df$CTQ_Any, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$CTQ_Any, df$PRE_Pain_MdSv)
X-squared = 23.917, df = 1, p-value = 1.006e-06
# chi-square test for any bullying
chisq.test(table(df$Bullying_Any, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$Bullying_Any, df$PRE_Pain_MdSv)
X-squared = 4.2373, df = 1, p-value = 0.03955
#ANOVA on race vs CTQ
#ANOVA on ED_RaceEthCode versus CTQ
anova_result <- aov(CTQ_Total ~ ED_RaceEthCode, data = df)
summary(anova_result)
Df Sum Sq Mean Sq F value Pr(>F)
ED_RaceEthCode 3 9 2.9964 3 0.0295 *
Residuals 2469 2466 0.9987
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
7 observations deleted due to missingness
TukeyHSD(anova_result)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = CTQ_Total ~ ED_RaceEthCode, data = df)
$ED_RaceEthCode
diff lwr upr p adj
2-1 -0.20223923 -0.37873380 -0.02574466 0.0171293
3-1 -0.13103376 -0.30186661 0.03979908 0.1989380
4-1 -0.14979061 -0.46007179 0.16049057 0.6007039
3-2 0.07120547 -0.04214908 0.18456002 0.3703250
4-2 0.05244862 -0.23028766 0.33518490 0.9641946
4-3 -0.01875685 -0.29799390 0.26048020 0.9981705
#ANOVA on pain trajectory by CTQ
result <- aov(CTQ_Total ~ Pain_Class, data = df)
summary(result)
Df Sum Sq Mean Sq F value Pr(>F)
Pain_Class 3 58.9 19.628 20.08 7.39e-13 ***
Residuals 2476 2420.1 0.977
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
TukeyHSD(result)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = CTQ_Total ~ Pain_Class, data = df)
$Pain_Class
diff lwr upr p adj
high-low 0.4160161 0.259847448 0.57218467 0.0000000
moderate-low 0.2791787 0.148849047 0.40950840 0.0000002
moderate recovery-low 0.1385781 -0.002276327 0.27943254 0.0557627
moderate-high -0.1368373 -0.308010344 0.03433567 0.1683568
moderate recovery-high -0.2774380 -0.456754281 -0.09812162 0.0004163
moderate recovery-moderate -0.1406006 -0.297926824 0.01672559 0.0988564
m1 #base model m2 #base model + additional features m12 #base model + CTQ_Triad m13 #m2 + CTQ_T
m16 #m2 + physical abuse m17 #m2 + physical neglect m18 #m2 + emotional abuse m19 #m2 + emotional neglect m20 #m2 + sexual abuse m21 #m2 + bullying
# List of all models
model_names <- paste0("m", 1:31)
models <- lapply(model_names, get)
#remove m25 from list of models
model_names2 <- model_names[-25]
models2 <- lapply(model_names2, get)
# Function to get AIC and BIC
get_aic_bic <- function(model) c(AIC = AIC(model), BIC = BIC(model))
# Apply the function to all models, sort results
compared_m <- as.data.frame(t(sapply(models, get_aic_bic)))
compared_m <- cbind(model = model_names, compared_m)
compared_m <- compared_m[order(compared_m$BIC), ]
# Print the results
print(compared_m)
#m18 is m2 + emotional abuse
#m13 is m2 + triad
#m16 is m2 + physical abuse
#m21 is m2 + bullying
#m2 is our main model
#m20 is m2 + sexual abuse
#m19 is m2 + emotional neglect
Trying this with m1, base model
# Load required library
library(pROC)
# Predict probabilities for m25
probs1 <- predict(m25, df, type = "probs")
# Create one-vs-rest ROC curves
roc_list <- list()
auroc_values <- numeric(length(colnames(probs1))) # Initialize vector for AUROC values
for (class in colnames(probs1)) {
roc_list[[class]] <- roc(as.numeric(df$Pain_Class == class), probs1[,class])
auroc_values[class] <- auc(roc_list[[class]]) # Calculate AUROC
}
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
# Plot ROC curves
plot(roc_list[[1]], main = "ROC Curves for Each Class", col = 1, lwd = 2)
for (i in 2:length(roc_list)) {
plot(roc_list[[i]], add = TRUE, col = i, lwd = 2)
}
# Add AUROC values as text directly on the plot
# Use a fixed position or adjust as needed
text_positions <- c(0.8, 0.7, 0.6, 0.5, 0.4, 0.3) # Adjust as necessary for your plot
for (i in 1:length(roc_list)) {
text(x = 0.6, y = text_positions[i],
labels = paste0(names(roc_list)[i], ": AUROC = ", round(auroc_values[i], 3)),
pos = 4, cex = 0.8, col = i)
}
# Add legend to the plot
legend("bottomright", legend = names(roc_list), col = 1:length(roc_list), lwd = 2, bty = "n")
library(caret)
# Function to compute confusion matrix with model names
get_confusion_matrix <- function(model, data, model_name) {
predictions <- predict(model, newdata = data, type = "class")
cm <- confusionMatrix(predictions, data$Pain_Class)
return(list(model = model_name, confusion_matrix = cm))}
# Compute confusion matrices with model names
model_names <- c("m2", "m25")
models <- lapply(model_names, get)
conf_matrices <- mapply(get_confusion_matrix, models, MoreArgs = list(data = df), model_name = model_names, SIMPLIFY = FALSE)
# Print the confusion matrices with model names
for (cm in conf_matrices) {
cat("Model:", cm$model, "\n")
print(cm$confusion_matrix)
cat("\n")}
Model: m2
Confusion Matrix and Statistics
Reference
Prediction low high moderate moderate recovery
low 798 121 294 304
high 27 80 58 30
moderate 77 89 128 55
moderate recovery 8 5 9 13
Overall Statistics
Accuracy : 0.4862
95% CI : (0.4646, 0.5078)
No Information Rate : 0.4342
P-Value [Acc > NIR] : 9.46e-07
Kappa : 0.1852
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: low Class: high Class: moderate Class: moderate recovery
Sensitivity 0.8769 0.27119 0.26176 0.032338
Specificity 0.3938 0.93615 0.86248 0.987013
Pos Pred Value 0.5260 0.41026 0.36676 0.371429
Neg Pred Value 0.8066 0.88690 0.79336 0.811257
Prevalence 0.4342 0.14074 0.23330 0.191794
Detection Rate 0.3807 0.03817 0.06107 0.006202
Detection Prevalence 0.7238 0.09303 0.16651 0.016698
Balanced Accuracy 0.6353 0.60367 0.56212 0.509676
Model: m25
Confusion Matrix and Statistics
Reference
Prediction low high moderate moderate recovery
low 787 124 301 300
high 34 77 60 42
moderate 64 81 102 44
moderate recovery 25 13 26 16
Overall Statistics
Accuracy : 0.4685
95% CI : (0.447, 0.4901)
No Information Rate : 0.4342
P-Value [Acc > NIR] : 0.0008348
Kappa : 0.1601
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: low Class: high Class: moderate Class: moderate recovery
Sensitivity 0.8648 0.26102 0.20859 0.039801
Specificity 0.3887 0.92449 0.88239 0.962220
Pos Pred Value 0.5205 0.36150 0.35052 0.200000
Neg Pred Value 0.7894 0.88423 0.78560 0.808532
Prevalence 0.4342 0.14074 0.23330 0.191794
Detection Rate 0.3755 0.03674 0.04866 0.007634
Detection Prevalence 0.7214 0.10162 0.13884 0.038168
Balanced Accuracy 0.6268 0.59275 0.54549 0.501010
for m1, m2, m2 + each subtype, m2 + each subtype ANY likelihood ratio test m1 vs m2
apply_bh_correction <- function(models, model_names) {
correct_model <- function(model) {
model_results <- tidy(model, exponentiate = TRUE) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)
model_results <- model_results %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH")) %>%
mutate(
p.value = round(p.value, digits = 6),
p_adj_bh = round(p_adj_bh, digits = 6)
) %>%
mutate(
p.value = format(p.value, scientific = FALSE),
p_adj_bh = format(p_adj_bh, scientific = FALSE)
)
return(model_results)
}
corrected_models <- lapply(models, correct_model)
names(corrected_models) <- model_names
return(corrected_models)
}
models_list_bases <- list(m1,m2)
model_names <- c("m1", "m2")
adjusted_models <- apply_bh_correction(models_list_bases, model_names)
cat("MODEL 1 BH CORRECTED")
MODEL 1 BH CORRECTED
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
cat("\n \n MODEL 2 BH CORRECTED")
MODEL 2 BH CORRECTED
tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
Within
cat("MODEL 13 BH CORRECTED")
MODEL 13 BH CORRECTED
tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
#function to get adjusted p values from list of models
extract_p_values <- function(model_df, model_name) {
model_df %>%
mutate(model = model_name) %>%
select(y.level, term, p.value,p_adj_bh, model)}
perform BH corrections on the values in combined_p_values_m16_m21
#BH corrections for models with subtypes m16-m21
#make first set of adjusted within p vals
models_list_subtypes <- list(m16, m17, m18, m19, m20, m21)
model_names2 <- c("m16", "m17", "m18", "m19", "m20", "m21")
adjusted_models2 <- apply_bh_correction(models_list_subtypes, model_names2)
combined_p_values_m16_m21 <- map2_dfr(adjusted_models2, paste0("m", 16:21), extract_p_values)
double_corrected_p_values_16_m21 <- combined_p_values_m16_m21 %>%
mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
mutate(
p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE) )
double_corrected_p_values_16_m21
#BH BETWEEN Corrections for models with binary indicators m26-m31
models_list_ANY <- list(m26, m27, m28, m29, m30, m31)
model_names3 <- c("m26", "m27", "m28", "m29", "m30", "m31")
adjusted_models3 <- apply_bh_correction(models_list_ANY, model_names3)
double_corrected_p_values_m26_m31 <- map2_dfr(adjusted_models3, paste0("m", 26:31), extract_p_values)
#perform BH corrections on the values in double_corrected_p_values_m26_m31
double_corrected_p_values_m26_m31 <- double_corrected_p_values_m26_m31 %>%
mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
mutate(
p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE) )
double_corrected_p_values_m26_m31
Table 1a: Socio-demographic variables
table1a_features <- c("ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank")
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1a_features)) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2)))
| Characteristic | N = 2,4801 |
|---|---|
| ED_GenderBirthCert | |
| Female | 1,554 (63%) |
| Male | 926 (37%) |
| ED_Age | 36.09 (13.31) |
| ED_RaceEthCode | |
| 1 | 278 (11%) |
| 2 | 891 (36%) |
| 3 | 1,213 (49%) |
| 4 | 91 (3.7%) |
| Unknown | 7 |
| BMI | 30.13 (8.32) |
| Unknown | 158 |
| ADI_NatRank | 63.93 (27.57) |
| Unknown | 80 |
| 1 n (%); Mean (SD) | |
Table 1b: ED/Trauma-related variable.names
table1b_features <- c("Site_New", "ED_Event_BroadClass", "ED_PDI_RS")
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1b_features)) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2)))
| Characteristic | N = 2,4801 |
|---|---|
| Site_New | |
| Midwest | 960 (39%) |
| NorthEast | 1,053 (42%) |
| SouthEast | 467 (19%) |
| ED_Event_BroadClass | |
| MVC/Non-motor Collision | 1,909 (77%) |
| Other | 340 (14%) |
| Physical/Sexual Abuse | 231 (9.3%) |
| ED_PDI_RS | 13.92 (7.24) |
| Unknown | 139 |
| 1 n (%); Mean (SD) | |
Table 1c: Past pain/stress variables
table1c_features <- c("PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")
# Create a named list to specify the type of each variable
type_list <- list(
WK2_CTQSF_PhyAbu_RS = "continuous",
WK2_CTQSF_EmoAbu_RS = "continuous",
WK2_CTQSF_SexAbu_RS = "continuous",
WK2_CTQSF_PhyNeg_RS = "continuous",
WK2_CTQSF_EmoNeg_RS = "continuous",
WK2_Bullying_Total = "continuous")
# Generate the summary table
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1c_features)) %>%
tbl_summary(
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2,
type = type_list)))
| Characteristic | N = 2,4801 |
|---|---|
| PRE_Pain_MdSv | 777 (31%) |
| Unknown | 10 |
| CTQ_Any | 1,977 (80%) |
| CTQ_Total | 0.00 (1.00) |
| PhyAbu_Any | 1,089 (44%) |
| WK2_CTQSF_PhyAbu_RS | 0.00 (1.00) |
| EmoAbu_Any | 1,627 (66%) |
| WK2_CTQSF_EmoAbu_RS | 0.00 (1.00) |
| SexAbu_Any | 867 (35%) |
| WK2_CTQSF_SexAbu_RS | 0.00 (1.00) |
| PhyNeg_Any | 1,146 (46%) |
| WK2_CTQSF_PhyNeg_RS | 0.00 (1.00) |
| EmoNeg_Any | 1,284 (52%) |
| WK2_CTQSF_EmoNeg_RS | 0.00 (1.00) |
| Bullying_Any | 1,977 (80%) |
| WK2_Bullying_Total | 0.00 (1.00) |
| 1 n (%); Mean (SD) | |
table1_class_features <- c("Pain_Class","ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank","Site_New", "ED_Event_BroadClass", "ED_PDI_RS","PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")
# Create a named list to specify the type of each variable
type_list <- list(
WK2_CTQSF_PhyAbu_RS = "continuous",
WK2_CTQSF_EmoAbu_RS = "continuous",
WK2_CTQSF_SexAbu_RS = "continuous",
WK2_CTQSF_PhyNeg_RS = "continuous",
WK2_CTQSF_EmoNeg_RS = "continuous",
WK2_Bullying_Total = "continuous")
# Generate the summary table and compare across class and add p
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1_class_features)) %>%
tbl_summary(
by = Pain_Class,
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2,
type = type_list) %>% add_p))
| Characteristic | low, N = 1,0701 | high, N = 3521 | moderate, N = 5901 | moderate recovery, N = 4681 | p-value2 |
|---|---|---|---|---|---|
| ED_GenderBirthCert | <0.001 | ||||
| Female | 605 (57%) | 237 (67%) | 409 (69%) | 303 (65%) | |
| Male | 465 (43%) | 115 (33%) | 181 (31%) | 165 (35%) | |
| ED_Age | 33.36 (12.52) | 40.89 (13.19) | 37.61 (13.12) | 36.81 (14.00) | <0.001 |
| ED_RaceEthCode | <0.001 | ||||
| 1 | 122 (11%) | 37 (11%) | 77 (13%) | 42 (9.0%) | |
| 2 | 387 (36%) | 93 (26%) | 201 (34%) | 210 (45%) | |
| 3 | 521 (49%) | 210 (60%) | 285 (49%) | 197 (42%) | |
| 4 | 39 (3.6%) | 12 (3.4%) | 23 (3.9%) | 17 (3.6%) | |
| Unknown | 1 | 0 | 4 | 2 | |
| BMI | 29.17 (7.95) | 32.15 (9.44) | 30.95 (8.28) | 29.75 (7.93) | <0.001 |
| Unknown | 78 | 21 | 38 | 21 | |
| ADI_NatRank | 62.03 (28.04) | 70.74 (26.08) | 66.28 (26.76) | 60.04 (27.52) | <0.001 |
| Unknown | 41 | 8 | 14 | 17 | |
| Site_New | 0.3 | ||||
| Midwest | 432 (40%) | 139 (39%) | 216 (37%) | 173 (37%) | |
| NorthEast | 441 (41%) | 136 (39%) | 267 (45%) | 209 (45%) | |
| SouthEast | 197 (18%) | 77 (22%) | 107 (18%) | 86 (18%) | |
| ED_Event_BroadClass | 0.12 | ||||
| MVC/Non-motor Collision | 814 (76%) | 268 (76%) | 469 (79%) | 358 (76%) | |
| Other | 152 (14%) | 40 (11%) | 76 (13%) | 72 (15%) | |
| Physical/Sexual Abuse | 104 (9.7%) | 44 (13%) | 45 (7.6%) | 38 (8.1%) | |
| ED_PDI_RS | 12.49 (7.09) | 15.81 (7.33) | 15.00 (7.07) | 14.49 (7.13) | <0.001 |
| Unknown | 43 | 27 | 42 | 27 | |
| PRE_Pain_MdSv | 189 (18%) | 214 (61%) | 251 (43%) | 123 (27%) | <0.001 |
| Unknown | 2 | 1 | 3 | 4 | |
| CTQ_Any | 805 (75%) | 301 (86%) | 490 (83%) | 381 (81%) | <0.001 |
| CTQ_Total | -0.15 (0.92) | 0.26 (1.09) | 0.13 (1.06) | -0.01 (0.97) | <0.001 |
| PhyAbu_Any | 397 (37%) | 193 (55%) | 298 (51%) | 201 (43%) | <0.001 |
| WK2_CTQSF_PhyAbu_RS | -0.14 (0.90) | 0.32 (1.19) | 0.13 (1.05) | -0.08 (0.93) | <0.001 |
| EmoAbu_Any | 623 (58%) | 260 (74%) | 416 (71%) | 328 (70%) | <0.001 |
| WK2_CTQSF_EmoAbu_RS | -0.18 (0.93) | 0.32 (1.11) | 0.15 (1.03) | 0.00 (0.95) | <0.001 |
| SexAbu_Any | 297 (28%) | 148 (42%) | 238 (40%) | 184 (39%) | <0.001 |
| WK2_CTQSF_SexAbu_RS | -0.15 (0.88) | 0.21 (1.16) | 0.13 (1.09) | 0.02 (0.97) | <0.001 |
| PhyNeg_Any | 460 (43%) | 177 (50%) | 297 (50%) | 212 (45%) | 0.012 |
| WK2_CTQSF_PhyNeg_RS | -0.03 (1.03) | 0.07 (1.01) | 0.04 (0.97) | -0.04 (0.95) | 0.028 |
| EmoNeg_Any | 525 (49%) | 189 (54%) | 327 (55%) | 243 (52%) | 0.078 |
| WK2_CTQSF_EmoNeg_RS | -0.05 (1.00) | 0.06 (1.03) | 0.03 (0.96) | 0.03 (1.01) | 0.080 |
| Bullying_Any | 814 (76%) | 290 (82%) | 476 (81%) | 397 (85%) | <0.001 |
| WK2_Bullying_Total | -0.15 (0.95) | 0.28 (1.11) | 0.09 (1.02) | 0.04 (0.95) | <0.001 |
| 1 n (%); Mean (SD) | |||||
| 2 Pearson’s Chi-squared test; Kruskal-Wallis rank sum test | |||||
df$PRE_Pain_MdSv <- as.factor(df$PRE_Pain_MdSv)
#df$CTQ_Any <- as.factor(df$CTQ_Any)
#df$Bullying_Any <- as.factor(df$Bullying_Any)
chisq.test(table(df$PRE_Pain_MdSv, df$Bullying_Any))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$PRE_Pain_MdSv, df$Bullying_Any)
X-squared = 4.2373, df = 1, p-value = 0.03955
chisq.test(table(df$PRE_Pain_MdSv, df$CTQ_Any))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$PRE_Pain_MdSv, df$CTQ_Any)
X-squared = 23.917, df = 1, p-value = 1.006e-06
m35 <- glm(PRE_Pain_MdSv ~ CTQ_Total, data = df, family = binomial)
tidy(m35, exponentiate = TRUE, conf.int = TRUE)
m36 <- glm(PRE_Pain_MdSv ~ WK2_Bullying_Total, data = df, family = binomial)
tidy(m36, exponentiate = TRUE, conf.int = TRUE)
ELA + previous pain z score normalized
df$CTQ_Total_z <- scale(df$CTQ_Total)
df$WK2_Bullying_Total_z <- scale(df$WK2_Bullying_Total)
model_ctq_z <- glm(PRE_Pain_MdSv ~ CTQ_Total_z, data = df, family = "binomial")
tidy(model_ctq_z, exponentiate = TRUE, conf.int = TRUE)
model_bullying_z <- glm(PRE_Pain_MdSv ~ WK2_Bullying_Total_z, data = df, family = "binomial")
tidy(model_bullying_z, exponentiate = TRUE, conf.int = TRUE)
Looking at specific bullying type and pain outcome
#create model that takes simple bullying as the predictor for pain class
m37 <- multinom(Pain_Class ~ BulliedSimple + ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2745.850345
iter 20 value 2613.317714
iter 30 value 2500.594126
iter 40 value 2483.908232
iter 50 value 2482.951517
final value 2482.866891
converged
tidy(m37, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#create model for hit or hurt bullying
m38 <- multinom(Pain_Class ~ BulliedHitOrHurt + ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2744.435610
iter 20 value 2563.304999
iter 30 value 2489.681522
iter 40 value 2480.003963
iter 50 value 2479.232270
final value 2479.171521
converged
tidy(m38, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))