This document contains the code for the AURORA ELA analysis published in the manuscript, Early life adversity increases risk for chronic posttraumatic pain, data from humans and rodents by McKibben et al. In this analysis, we explore the relationship between childhood trauma (measured via CTQ), bullying, and pain trajectory in the AURORA dataset.

For additional work by our research team, see Linnstaedt Lab Website

Set up

Import libraries

rm(list = ls())

libraries <- c("ggsignif","tidyverse", "rstatix", "corrplot", "nlme", "sjPlot", "readxl", "nnet", "chisq.posthoc.test","broom", "knitr","lmtest", "ggpubr","gt","forcats","car", "highcharter", "ggplot2", "corrr", "ggcorrplot", "gridExtra", "skimr", "pROC", "gtsummary", "dunn.test","tidyr", "purrr", "dplyr")

suppressMessages(suppressWarnings(
  invisible(lapply(libraries, library, character.only = TRUE))))

# Explicitly set select to refer to dplyr::select
select <- dplyr::select


Define plot elements

gghisto <- list(
  theme(axis.text.x = element_text(face="bold", color="cornflowerblue", size=14, angle= 20),
          axis.text.y = element_text(face="bold", color="royalblue4", 
          size=16, angle=25), axis.title=element_text(size=17,face="bold"),
          plot.title = element_text(size=14,face="bold.italic")))

gghisto2 <- list(
  theme(axis.text.x = element_text(face="bold", color="cornflowerblue", size=8, angle=6),
          axis.text.y = element_text(face="bold", color="royalblue4", 
          size=12), axis.title=element_text(size=14,face="bold"),
          plot.title = element_text(size=14,face="bold.italic")))

color_fill <- scale_fill_manual(values = c("1" = "violetred1", "2" = "steelblue1")) 


Import all freeze 4 datasets

#Read in the dataset
data_1 <- read.csv("AURORA_full_update02102022_New.csv",check.names=F)[-1]

#freeze 4 datasets
AURORA_Freeze_4_general_mod <- read_csv("AURORA_Freeze_4_general_mod.csv",na = ".")
Rows: 2943 Columns: 122
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr    (1): ED_TransportMode_other
dbl  (120): PID, WK2_PctComplete, Wk8_PctComplete, M3_PctComplete, M6_PctComplete, m12_PctComplete, ED_Admit, ED_...
date   (1): ED_DayofTrauma

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AURORA_Freeze_4_demogr_mod <- read_csv("AURORA_Freeze_4_demogr_mod.csv",na = ".")
Rows: 2943 Columns: 10
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (10): PID, ED_GenderBirthCert, ED_GenderNow, ED_Age, ED_Marital, ED_RaceEthCode, ED_highestgrade, BMI, WK2_Em...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AURORA_Freeze_4_pain_mod <- read_csv("AURORA_Freeze_4_pain_mod.csv",na = ".")
Rows: 2943 Columns: 41
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (41): PID, ED_NowPain_YN_COUNT, ED_NowPain_MdSv, PRE_Pain_YN_COUNT, PRE_Pain_MdSv, PRE_PROM_Pain4a_T, PRE_PCS...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AURORA_Freeze_4_ctq_mod <- read_csv("AURORA_Freeze_4_ctq_mod.csv",na = ".")
Rows: 2943 Columns: 7
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (7): PID, WK2_CTQSF_EmoAbu_RS, WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_SexAbu_RS, WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_PhyNe...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AURORA_F4_CTQ_item <- readxl::read_excel("AURORA_F4_CTQ_item.xlsx")
AURORA_Freeze_4_pdi_mod <- read_csv("AURORA_Freeze_4_pdi_mod.csv",na = ".")
Rows: 2943 Columns: 2
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (2): PID, ED_PDI_RS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
SiteID <- read_excel("SiteID.xlsx")
FZ4_df <- AURORA_Freeze_4_demogr_mod %>% inner_join(AURORA_F4_CTQ_item,by="PID") %>% 
  inner_join(AURORA_Freeze_4_pain_mod,by="PID") %>% 
  inner_join(AURORA_Freeze_4_ctq_mod,by="PID") %>% 
  inner_join(AURORA_Freeze_4_pdi_mod,by="PID") %>% 
  inner_join(SiteID,by="PID") %>% 
  mutate(Site_New = case_when(SiteID %in% c("Baystate","BMC", "Beth Israel", "BWH", "Cooper","Einstein","Jefferson","MGH","Miriam","Penn","Rhode Island","St. John","St. Joseph","Temple","UMass") ~ "NorthEast",SiteID %in% c("Baylor", "Emory ED", "Jacksonville","UAB","UNC","UT Houston","UT Southwestern","Vanderbilt") ~ "SouthEast", SiteID %in% c("39","49","Beaumont Royal Oak","Beaumont Troy","Eskenazi","Henry Ford","Cincinnati","Indiana","WashU","WashU DP") ~ "Midwest")) %>% 
  convert_as_factor(Site_New,ED_GenderBirthCert,ED_GenderNow,ED_Marital,ED_RaceEthCode,ED_highestgrade,WK2_EmploymentCode,WK2_IncomeCode) %>% 
  mutate_at(vars(contains("WK2_Childhood")),as.numeric) %>% dplyr::select(-SiteID) %>% #2682
  mutate(WK2_Bullying_Total = WK2_ChildhoodBullying+WK2_ChildhoodHitOrHurt) %>% 
  inner_join(data_1 %>% dplyr::select(PID,WK8_Pain_C,M3_Pain_C,M6_Pain_C),by="PID") %>% 
  filter(is.na(WK2_CTQSF_SexAbu_RS) == F) %>% 
  filter(is.na(WK2_CTQSF_PhyAbu_RS)== F) %>% 
  filter(is.na(WK2_CTQSF_EmoAbu_RS)== F) %>% 
  filter(is.na(WK2_CTQSF_EmoNeg_RS) == F) %>% 
  filter(is.na(WK2_CTQSF_PhyNeg_RS) == F) %>% 
  filter(is.na(WK2_Bullying_Total) == F) %>% #2483
  mutate(log_WK2_CTQSF_Total_RS = log2(WK2_CTQSF_Total_RS+1),log_ED_NowPain=log2(ED_NowPain+1))


# Import datasets
pain_trajectory <- read_csv("pain_latent_class_4.csv")
Rows: 2943 Columns: 6
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): pain_Class_MostLikely
dbl (5): PID, Probability_Class1, Probability_Class2, Probability_Class3, Probability_Class4

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PTSD_df <- read_excel("AURORA_Freeze_4_ptsd_mod.xlsx")
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
Injury_df <- read_excel("AURORA_Freeze_4_injury_mod.xlsx")
ADI_df <- read_excel("AURORA_Freeze_4_ses_mod.xlsx",na = ".")

# Combine datasets
df_traj <- FZ4_df %>%
  inner_join(Trauma_df %>% select(PID, ED_Event_BroadClass), by = "PID") %>%
  inner_join(Injury_df %>% select(PID, ED_Concussion), by = "PID") %>%
  inner_join(ADI_df %>% select(PID, ADI_NatRank), by = "PID") %>%
  inner_join(pain_trajectory %>% select(PID, pain_Class_MostLikely), by = "PID")


Cleaning: Relevel and refactor features

Refactor pain trajectories and Relevel to “low pain” as the reference group

df_traj <- df_traj %>%
  rename(Pain_Class = pain_Class_MostLikely)

df_traj <- df_traj %>%
  mutate(Pain_Class = case_when(
    Pain_Class == "Class 1" ~ "moderate recovery",
    Pain_Class == "Class 2" ~ "moderate",
    Pain_Class == "Class 3" ~ "low",
    Pain_Class == "Class 4" ~ "high",
    TRUE ~ as.character(Pain_Class)  # This handles any unexpected values
  )) %>%
  mutate(Pain_Class = factor(Pain_Class)) %>%
  mutate(Pain_Class = relevel(Pain_Class, ref = "low"))

Factorize and rename categorical variables

factor_vars <- c("ED_GenderBirthCert", "ED_Event_BroadClass", "ED_Marital", 
                 "WK2_EmploymentCode", "ED_highestgrade", "ED_RaceEthCode", "PRE_Pain_MdSv")
df_traj[factor_vars] <- lapply(df_traj[factor_vars], as.factor)

#in gender, change 1 to male and 2 to female
df_traj$ED_GenderBirthCert <- ifelse(df_traj$ED_GenderBirthCert == 1, "Male", "Female")

#in pre pain, change 1 to yes and 0 to no .. is this correct?
df_traj$PRE_Pain_MdSv <- ifelse(df_traj$PRE_Pain_MdSv == 1, "Yes", "No")

# Convert continuous variables to numeric
df_traj$ED_PDI_RS <- as.numeric(df_traj$ED_PDI_RS)
df_traj$ED_Concussion <- as.numeric(df_traj$ED_Concussion)
Warning: NAs introduced by coercion
#rename WK2 CTQ total column to CTQ_Total
df_traj <- df_traj %>%
  rename(CTQ_Total = WK2_CTQSF_Total_RS)

#print column headers that contain the letters "ED"
colnames(df_traj)[grep("ED", colnames(df_traj))]
 [1] "ED_GenderBirthCert"  "ED_GenderNow"        "ED_Age"              "ED_Marital"          "ED_RaceEthCode"     
 [6] "ED_highestgrade"     "ED_NowPain_YN_COUNT" "ED_NowPain_MdSv"     "ED_Pain_CSNWP_Count" "ED_NowPain"         
[11] "ED_PainDiff_sum"     "ED_PDI_RS"           "log_ED_NowPain"      "ED_Event_BroadClass" "ED_Concussion"      


General clean up, renaming, removing negatives

#Make any value less than 0 in the bullying total == NA
df_traj$WK2_Bullying_Total <- ifelse(df_traj$WK2_Bullying_Total < 0, NA, df_traj$WK2_Bullying_Total)

#Make any value less than 0 in ADI == NA
df_traj$ADI_NatRank <- ifelse(df_traj$ADI_NatRank < 0, NA, df_traj$ADI_NatRank)


Remove three bullying rows have negative numbers

# Remove the rows in WK2_Bullying_Total that contain negative values
df_traj <- df_traj %>% filter(WK2_Bullying_Total >= 0)


Relevel categories for trauma type

df_traj <- df_traj %>%
  mutate(ED_Event_BroadClass = case_when(
    ED_Event_BroadClass == "1" ~ "MVC/Non-motor Collision",
    ED_Event_BroadClass == "6" ~ "MVC/Non-motor Collision",
    ED_Event_BroadClass == "2" ~ "Physical/Sexual Abuse",
    ED_Event_BroadClass == "3" ~ "Physical/Sexual Abuse",
    ED_Event_BroadClass == "4" ~ "Other",
    ED_Event_BroadClass == "5" ~ "Other",
    ED_Event_BroadClass == "7" ~ "Other",
    ED_Event_BroadClass == "8" ~ "Other",
    ED_Event_BroadClass == "9" ~ "Other",
    ED_Event_BroadClass == "10" ~ "Other",
    ED_Event_BroadClass == "11" ~ "Other",
    TRUE ~ as.character(ED_Event_BroadClass)  # Keeps any other values unchanged
  ))

# Convert ED_Event broad class to factor
df_traj$ED_Event_BroadClass <- factor(df_traj$ED_Event_BroadClass)

table(df_traj$ED_Event_BroadClass)

MVC/Non-motor Collision                   Other   Physical/Sexual Abuse 
                   1909                     340                     231 


Relevel categories for education

#relevel education category
# 1- did not finish HS (codes 0-12) 2- HS grad + some college (codes 13-15) 3- college grad (Bachelors or Associates) (codes 16-18) 4- post grad codes(19-21)
df_traj <- df_traj %>%
  mutate(ED_highestgrade = case_when(
    ED_highestgrade == "0" ~ "Did not finish HS",
    ED_highestgrade == "1" ~ "Did not finish HS",
    ED_highestgrade == "7" ~ "Did not finish HS",
    ED_highestgrade == "8" ~ "Did not finish HS",
    ED_highestgrade == "9" ~ "Did not finish HS",
    ED_highestgrade == "10" ~ "Did not finish HS",
    ED_highestgrade == "11" ~ "Did not finish HS",
    ED_highestgrade == "12" ~ "Did not finish HS",
    ED_highestgrade == "13" ~ "HS Grad/Some College",
    ED_highestgrade == "14" ~ "HS Grad/Some College",
    ED_highestgrade == "15" ~ "HS Grad/Some College",
    ED_highestgrade == "16" ~ "Bach or Assoc degree",
    ED_highestgrade == "17" ~ "Bach or Assoc degree",
    ED_highestgrade == "18" ~ "Bach or Assoc degree",
    ED_highestgrade == "19" ~ "Post-graduate education",
    ED_highestgrade == "20" ~ "Post-graduate education",
    ED_highestgrade == "21" ~ "Post-graduate education",
    ED_highestgrade == "-7" ~ NA,
    ED_highestgrade == "-5" ~ NA,
    TRUE ~ as.character(ED_highestgrade)  # Keeps any other values unchanged
  ))

# Convert to factor
df_traj$ED_highestgrade <- factor(df_traj$ED_highestgrade)

table(df_traj$ED_highestgrade)

   Bach or Assoc degree       Did not finish HS    HS Grad/Some College Post-graduate education 
                    695                     267                    1321                     189 


Re-level marriage category

#marital status 1-married 2-separated 3-divorced 4-annulled 5-widowed 6-single
#4. marriage - combine 2+3+4+5 as a single category

df_traj <- df_traj %>%
  mutate(ED_Marital = case_when(
    ED_Marital == "1" ~ "Married",
    ED_Marital == "2" ~ "Divorced/Widowed",
    ED_Marital == "3" ~ "Divorced/Widowed",
    ED_Marital == "4" ~ "Divorced/Widowed",
    ED_Marital == "5" ~ "Divorced/Widowed",
    ED_Marital == "6" ~ "Single",
    TRUE ~ as.character(ED_Marital)  # Keeps any other values unchanged
  ))

# Convert to factor
df_traj$ED_Marital <- factor(df_traj$ED_Marital)

table(df_traj$ED_Marital)

Divorced/Widowed          Married           Single 
             446              539             1483 


Subset to final df for analysis

Subset df_traj to our final df that will be used for model analysis, keeping only the features needed.

df <- df_traj %>%
  select(PID, Pain_Class,ED_Age, Site_New, ED_RaceEthCode, ED_GenderBirthCert, ADI_NatRank, CTQ_Total, 
         BMI, PRE_Pain_MdSv, WK2_EmploymentCode, ED_Marital, ED_highestgrade, 
         ED_Concussion, ED_Event_BroadClass, ED_PDI_RS, WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_EmoAbu_RS, WK2_CTQSF_SexAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoNeg_RS, WK2_Bullying_Total)


Add the CTQ triad score that is a combination of bullying, physical abuse, and emotional abuse scores.

#Add combination CTQ score
df$CTQ_Triad <- df$WK2_Bullying_Total + df$WK2_CTQSF_PhyAbu_RS + df$WK2_CTQSF_EmoAbu_RS

#Print the rows that have NA in the CTQ triad
df %>% filter(is.na(CTQ_Triad))
#Confirms that for the CTQ triad, if any of the additive features contain NA, then the final score is NA

#Add the two different types of bullying question
df <- df %>%
  left_join(FZ4_df %>% select(PID, BulliedSimple = 20, BulliedHitOrHurt = 21), by = "PID")

#Remove first column, PID, as this is unneeded 
df <- df %>% select(-PID)

#make all of df[,c(2,6:8)]) numeric
df[,c(2,6:8)] <- lapply(df[,c(2,6:8)], as.numeric)


Binary scores for later models

df$Bullying_Any <- ifelse(df$WK2_Bullying_Total > 0, 1, 0)
df$PhyAbu_Any <- ifelse(df$WK2_CTQSF_PhyAbu_RS > 0, 1, 0)
df$EmoAbu_Any <- ifelse(df$WK2_CTQSF_EmoAbu_RS > 0, 1, 0)
df$SexAbu_Any <- ifelse(df$WK2_CTQSF_SexAbu_RS > 0, 1, 0)
df$PhyNeg_Any <- ifelse(df$WK2_CTQSF_PhyNeg_RS > 0, 1, 0)
df$EmoNeg_Any <- ifelse(df$WK2_CTQSF_EmoNeg_RS > 0, 1, 0)
df$CTQ_Any <- ifelse(df$CTQ_Total > 0, 1, 0)


Z-transform scaling

Z-scaling is a method of standardizing the data so that it has a mean of 0 and a standard deviation of 1. This is done to make the data more interpretable and to ensure that the variables are on the same scale. The scale function works by subtracting the mean of the data from each value and then dividing by the standard deviation.

#scale
df[, c(7, 16:22)] <- scale(df[, c(7, 16:22)])
df[, c(7, 16:22)] <- lapply(df[, c(7, 16:22)], function(x) as.numeric(scale(x)))

summary(df)
             Pain_Class       ED_Age           Site_New    ED_RaceEthCode ED_GenderBirthCert  ADI_NatRank    
 low              :1070   Min.   :18.00   Midwest  : 960   1   : 278      Length:2480        Min.   :  2.00  
 high             : 352   1st Qu.:25.00   NorthEast:1053   2   : 891      Class :character   1st Qu.: 41.00  
 moderate         : 590   Median :33.00   SouthEast: 467   3   :1213      Mode  :character   Median : 66.00  
 moderate recovery: 468   Mean   :36.09                    4   :  91                         Mean   : 63.93  
                          3rd Qu.:46.00                    NA's:   7                         3rd Qu.: 91.00  
                          Max.   :74.00                                                      Max.   :100.00  
                                                                                             NA's   :80      
   CTQ_Total            BMI        PRE_Pain_MdSv      WK2_EmploymentCode            ED_Marital  
 Min.   :-0.9702   Min.   : 8.40   Length:2480        1   :1816          Divorced/Widowed: 446  
 1st Qu.:-0.8681   1st Qu.:24.10   Class :character   2   :  65          Married         : 539  
 Median :-0.3576   Median :28.40   Mode  :character   3   :  55          Single          :1483  
 Mean   : 0.0000   Mean   :30.13                      4   :  96          NA's            :  12  
 3rd Qu.: 0.6636   3rd Qu.:34.50                      5   : 446                                 
 Max.   : 3.5227   Max.   :84.50                      NA's:   2                                 
                   NA's   :158                                                                  
                ED_highestgrade ED_Concussion                  ED_Event_BroadClass   ED_PDI_RS    
 Bach or Assoc degree   : 695   Min.   :0.00000   MVC/Non-motor Collision:1909     Min.   : 0.00  
 Did not finish HS      : 267   1st Qu.:0.00000   Other                  : 340     1st Qu.: 8.00  
 HS Grad/Some College   :1321   Median :0.00000   Physical/Sexual Abuse  : 231     Median :14.00  
 Post-graduate education: 189   Mean   :0.04518                                    Mean   :13.92  
 NA's                   :   8   3rd Qu.:0.00000                                    3rd Qu.:19.00  
                                Max.   :1.00000                                    Max.   :32.00  
                                NA's   :1                                          NA's   :139    
 WK2_CTQSF_PhyAbu_RS WK2_CTQSF_EmoAbu_RS WK2_CTQSF_SexAbu_RS WK2_CTQSF_PhyNeg_RS WK2_CTQSF_EmoNeg_RS
 Min.   :-0.6769     Min.   :-0.9692     Min.   :-0.5770     Min.   :-0.7135     Min.   :-0.8161    
 1st Qu.:-0.6769     1st Qu.:-0.9692     1st Qu.:-0.5770     1st Qu.:-0.7135     1st Qu.:-0.8161    
 Median :-0.6769     Median :-0.2107     Median :-0.5770     Median :-0.7135     Median :-0.3917    
 Mean   : 0.0000     Mean   : 0.0000     Mean   : 0.0000     Mean   : 0.0000     Mean   : 0.0000    
 3rd Qu.: 0.2991     3rd Qu.: 0.5478     3rd Qu.: 0.3189     3rd Qu.: 0.6853     3rd Qu.: 0.8815    
 Max.   : 2.7933     Max.   : 2.0647     Max.   : 3.0065     Max.   : 3.0168     Max.   : 2.5792    
                                                                                                    
 WK2_Bullying_Total   CTQ_Triad       BulliedSimple   BulliedHitOrHurt  Bullying_Any      PhyAbu_Any    
 Min.   :-1.24976   Min.   :-1.1256   Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:-0.82836   1st Qu.:-0.8077   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
 Median : 0.01444   Median :-0.3309   Median :1.000   Median :0.0000   Median :1.0000   Median :0.0000  
 Mean   : 0.00000   Mean   : 0.0000   Mean   :1.187   Mean   :0.8198   Mean   :0.7972   Mean   :0.4391  
 3rd Qu.: 0.43584   3rd Qu.: 0.6227   3rd Qu.:2.000   3rd Qu.:2.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
 Max.   : 2.12144   Max.   : 2.6889   Max.   :4.000   Max.   :4.0000   Max.   :1.0000   Max.   :1.0000  
                                                                                                        
   EmoAbu_Any      SexAbu_Any       PhyNeg_Any       EmoNeg_Any        CTQ_Any      
 Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
 Median :1.000   Median :0.0000   Median :0.0000   Median :1.0000   Median :1.0000  
 Mean   :0.656   Mean   :0.3496   Mean   :0.4621   Mean   :0.5177   Mean   :0.7972  
 3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
 Max.   :1.000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
                                                                                    
#print median of each ctq type and bullying
df %>% summarize(median(CTQ_Total), median(WK2_CTQSF_PhyAbu_RS), median(WK2_CTQSF_EmoAbu_RS), median(WK2_CTQSF_SexAbu_RS), median(WK2_CTQSF_PhyNeg_RS), median(WK2_CTQSF_EmoNeg_RS), median(WK2_Bullying_Total))

#make boxplots of each ctq type and bullying
#bullying has the highest median after z-score transforming, so it has the highest values
df %>% pivot_longer(cols = c(7, 17:22), names_to = "CTQ_Type", values_to = "CTQ_Score") %>%
  ggplot(aes(x = CTQ_Type, y = CTQ_Score)) +
  geom_boxplot() +
  labs(title = "Boxplot of CTQ Scores", x = "CTQ Type", y = "CTQ Score") + gghisto



Preliminary Analysis



Table 1: Participant Characteristics

Table 1: Socio-demographic variables

table1a_features <- c("ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank")

suppressMessages(suppressWarnings(
  df %>%
    select(all_of(table1a_features)) %>%
    tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", 
      digits = all_continuous() ~ 2)))
Characteristic N = 2,4801
ED_GenderBirthCert
    Female 1,554 (63%)
    Male 926 (37%)
ED_Age 36.09 (13.31)
ED_RaceEthCode
    1 278 (11%)
    2 891 (36%)
    3 1,213 (49%)
    4 91 (3.7%)
    Unknown 7
BMI 30.13 (8.32)
    Unknown 158
ADI_NatRank 63.93 (27.57)
    Unknown 80
1 n (%); Mean (SD)


Table 1: ED/Trauma-related variable.names

table1b_features <- c("Site_New", "ED_Event_BroadClass", "ED_PDI_RS")

suppressMessages(suppressWarnings(
df %>%
    select(all_of(table1b_features)) %>%
    tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", 
      digits = all_continuous() ~ 2)))
Characteristic N = 2,4801
Site_New
    Midwest 960 (39%)
    NorthEast 1,053 (42%)
    SouthEast 467 (19%)
ED_Event_BroadClass
    MVC/Non-motor Collision 1,909 (77%)
    Other 340 (14%)
    Physical/Sexual Abuse 231 (9.3%)
ED_PDI_RS 13.92 (7.24)
    Unknown 139
1 n (%); Mean (SD)


Table 1: Past pain/stress variables

table1c_features <- c("PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")

# Create a named list to specify the type of each variable
type_list <- list(
  WK2_CTQSF_PhyAbu_RS = "continuous",
  WK2_CTQSF_EmoAbu_RS = "continuous",
  WK2_CTQSF_SexAbu_RS = "continuous",
  WK2_CTQSF_PhyNeg_RS = "continuous",
  WK2_CTQSF_EmoNeg_RS = "continuous",
  WK2_Bullying_Total = "continuous")

# Generate the summary table
suppressMessages(suppressWarnings(
  df %>%
    select(all_of(table1c_features)) %>%
    tbl_summary(
      statistic = all_continuous() ~ "{mean} ({sd})",
      digits = all_continuous() ~ 2,
      type = type_list)))
Characteristic N = 2,4801
PRE_Pain_MdSv 777 (31%)
    Unknown 10
CTQ_Any 1,977 (80%)
CTQ_Total 0.00 (1.00)
PhyAbu_Any 1,089 (44%)
WK2_CTQSF_PhyAbu_RS 0.00 (1.00)
EmoAbu_Any 1,627 (66%)
WK2_CTQSF_EmoAbu_RS 0.00 (1.00)
SexAbu_Any 867 (35%)
WK2_CTQSF_SexAbu_RS 0.00 (1.00)
PhyNeg_Any 1,146 (46%)
WK2_CTQSF_PhyNeg_RS 0.00 (1.00)
EmoNeg_Any 1,284 (52%)
WK2_CTQSF_EmoNeg_RS 0.00 (1.00)
Bullying_Any 1,977 (80%)
WK2_Bullying_Total 0.00 (1.00)
1 n (%); Mean (SD)



Descriptive Stats

#calculate the % of participants reported experiencing at least one instance of any type of ELA (i.e. either CTQ≥1 or childhood Bullying ≥1)  and the M (SD) for composite CTQ
df %>% 
  summarize(perc_any_ELA = mean(CTQ_Total >= 1 | WK2_Bullying_Total >= 1, na.rm = TRUE),
            mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
            sd_CTQ = sd(CTQ_Total, na.rm = TRUE))

How many people had at a score of 1 or more for at least two different CTQ subtypes?

# Create a logical matrix where TRUE indicates a score of 1 or more
logical_matrix <- df %>%
  select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS, 
         WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
  mutate(across(everything(), ~ . >= 1))

# Count the number of TRUE values in each row
df$ctq_total_count <- rowSums(logical_matrix)

# Filter individuals with at least 2 CTQ subtypes with scores of 1 or more
sum(df$ctq_total_count >= 2)
[1] 703
#what is the most common ctq subtype with percentages out of 2480
df %>% 
  select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS, 
         WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
  pivot_longer(cols = everything(), names_to = "CTQ_Subtype", values_to = "Score") %>%
  filter(Score >= 1) %>%
  count(CTQ_Subtype) %>%
  arrange(desc(n)) %>%
  mutate(perc = n/2480)
#how many cases in the df had either bullying score 1+ or total CTQ score 1+
df %>% filter(CTQ_Total >= 1 | WK2_Bullying_Total >= 1) %>% nrow()
[1] 650
#ttest comparing ctq in men vs women
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)

    Welch Two Sample t-test

data:  CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 0.1567791 0.3101002
sample estimates:
mean in group Female   mean in group Male 
          0.08716336          -0.14627631 
#caclulcate std in addition to means for sex
df %>% 
  group_by(ED_GenderBirthCert) %>% 
  summarize(mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
            sd_CTQ = sd(CTQ_Total, na.rm = TRUE))

#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_Bullying_Total by ED_GenderBirthCert
t = 0.093303, df = 2028.3, p-value = 0.9257
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.07650414  0.08414729
sample estimates:
mean in group Female   mean in group Male 
         0.001426928         -0.002394650 


Correlations, Heatmap

# Function to calculate Spearman correlations, p-values, and number of complete cases
correlation_summary <- function(x, y) {
  cor_test <- cor.test(df[[x]], df[[y]], method = "spearman")
  
  tibble(
    Variable1 = x,
    Variable2 = y,
    Rho = cor_test$estimate,
    P_Value = cor_test$p.value,
  )
}

# Define the columns to correlate
columns <- colnames(df)[16:22]

# Create a summary table
all_ctq_correlations <- expand_grid(Variable1 = columns, Variable2 = columns) %>%
  filter(Variable1 != Variable2) %>%
  pmap_dfr(~ correlation_summary(..1, ..2))

#save CTQ correlations to a CSV
#write.csv(all_ctq_correlations, "all_ELA_correlations_for_Lauren.csv")

Heatmap of correlations

# Compute the correlation matrix
cor_matrix <- cor(df[,c(16:22)], use = "complete.obs", method="spearman")

# Plot the correlation matrix
# Supplementary Figure 1
ggcorrplot(cor_matrix, lab = TRUE)


Sup. Fig. 2: SEX x CTQ stats+plots

#Look at the relationship between ED_GenderBirthCert and CTQ_Total
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 0.1567791 0.3101002
sample estimates:
mean in group Female   mean in group Male 
          0.08716336          -0.14627631 
p1 <- ggplot(df, aes(x = ED_GenderBirthCert, y = CTQ_Total)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ TOTAL by GENDER",
       x = "GENDER",
       y = "CTQ TOTAL") + gghisto

# Sexual assault CTQ x Sex
t.test(WK2_CTQSF_SexAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_CTQSF_SexAbu_RS by ED_GenderBirthCert
t = 10.874, df = 2462.5, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 0.3270426 0.4709518
sample estimates:
mean in group Female   mean in group Male 
           0.1489804           -0.2500168 
p2 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_SexAbu_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Sexual Abuse by GENDER",
       x = "GENDER",
       y = "CTQ Sexual Abuse") + gghisto

# physical abuse x Sex
t.test(WK2_CTQSF_PhyAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_CTQSF_PhyAbu_RS by ED_GenderBirthCert
t = 3.8607, df = 2199.9, p-value = 0.0001163
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 0.07541138 0.23110576
sample estimates:
mean in group Female   mean in group Male 
          0.05722477          -0.09603380 
p3 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyAbu_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Physical Abuse by GENDER",
       x = "GENDER",
       y = "CTQ phy Abuse") + gghisto

# physical neglect x sex
t.test(WK2_CTQSF_PhyNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_CTQSF_PhyNeg_RS by ED_GenderBirthCert
t = -2.0304, df = 1876.9, p-value = 0.04246
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.167457707 -0.002900033
sample estimates:
mean in group Female   mean in group Male 
         -0.03180469           0.05337418 
p4 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyNeg_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Physical Neglect by GENDER",
       x = "GENDER",
       y = "CTQ phy neglect") + gghisto

#emotional abuse x sex
t.test(WK2_CTQSF_EmoAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_CTQSF_EmoAbu_RS by ED_GenderBirthCert
t = 8.046, df = 2232.8, p-value = 1.375e-15
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 0.2380158 0.3914276
sample estimates:
mean in group Female   mean in group Male 
           0.1175130           -0.1972087 
p5 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoAbu_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Emotional Abuse by GENDER",
       x = "GENDER",
       y = "CTQ emo abuse") + gghisto

#emotional neglect x sex
t.test(WK2_CTQSF_EmoNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_CTQSF_EmoNeg_RS by ED_GenderBirthCert
t = -0.51873, df = 1971.3, p-value = 0.604
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.10251665  0.05962911
sample estimates:
mean in group Female   mean in group Male 
        -0.008006827          0.013436944 
p6 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoNeg_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Emotional Neglect by GENDER",
       x = "GENDER",
       y = "CTQ emo neglect") + gghisto


#bullying x sex
p7 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_Bullying_Total)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "Bullying by Gender",
       x = " ",
       y = "Bullying Score") + gghisto

ctq.sex.plots <- grid.arrange(p1, p2, p3, p4, p5, p6, p7, ncol = 3)


#save these plots to a pdf
#ggsave("ctq.sex.plots.pdf", ctq.sex.plots, width = 12, height = 10)


#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)

    Welch Two Sample t-test

data:  WK2_Bullying_Total by ED_GenderBirthCert
t = 0.093303, df = 2028.3, p-value = 0.9257
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.07650414  0.08414729
sample estimates:
mean in group Female   mean in group Male 
         0.001426928         -0.002394650 
#ttest of ctq total by sex
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)

    Welch Two Sample t-test

data:  CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 0.1567791 0.3101002
sample estimates:
mean in group Female   mean in group Male 
          0.08716336          -0.14627631 


Sup. Fig. 3: RACE x CTQ stats+plots

#Look at the relationship between RACE and each CTQ category
# List of CTQ categories and their corresponding columns
ctq_tests <- list(
  CTQ_Total = "CTQ_Total",
  Physical_Abuse = "WK2_CTQSF_PhyAbu_RS",
  Physical_Neglect = "WK2_CTQSF_PhyNeg_RS",
  Emotional_Abuse = "WK2_CTQSF_EmoAbu_RS",
  Emotional_Neglect = "WK2_CTQSF_EmoNeg_RS",
  Sexual_Abuse = "WK2_CTQSF_SexAbu_RS",
  Bullying = "WK2_Bullying_Total"
)

# Initialize a vector to store significant results
significant_tests <- c()

# Perform Kruskal-Wallis test and Dunn's test for each CTQ category
for (name in names(ctq_tests)) {
  cat("Testing:", name, "\n")
  kw_test <- kruskal.test(df[[ctq_tests[[name]]]] ~ df$ED_RaceEthCode)
  print(kw_test)  
  if (kw_test$p.value < 0.05) {
    cat("Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...\n")
    dunn_result <- dunn.test(df[[ctq_tests[[name]]]], df$ED_RaceEthCode, method = "bh") 
    print(dunn_result)
    significant_tests <- c(significant_tests, name)
  } else {
    cat("Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.\n")
  }
  cat("\n") 
}
Testing: CTQ_Total 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 7.7918, df = 3, p-value = 0.05052

Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.

Testing: Physical_Abuse 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 7.5855, df = 3, p-value = 0.0554

Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.

Testing: Physical_Neglect 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 16.152, df = 3, p-value = 0.001056

Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 16.1517, df = 3, p-value = 0

                           Comparison of x by group                            
                             (Benjamini-Hochberg)                              
Col Mean-|
Row Mean |          1          2          3
---------+---------------------------------
       2 |   3.850210
         |    0.0004*
         |
       3 |   2.334541  -2.476521
         |    0.0196*    0.0199*
         |
       4 |   1.280521  -0.998172  -0.005352
         |     0.1503     0.1909     0.4979

alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 16.1517

$Z
[1]  3.85021063  2.33454133 -2.47652115  1.28052161 -0.99817227 -0.00535235

$P
[1] 5.900814e-05 9.783697e-03 6.633488e-03 1.001809e-01 1.590979e-01 4.978647e-01

$P.adjusted
[1] 0.0003540489 0.0195673948 0.0199004649 0.1502713109 0.1909174969 0.4978647316

$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"


Testing: Emotional_Abuse 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 21.301, df = 3, p-value = 9.116e-05

Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 21.3011, df = 3, p-value = 0

                           Comparison of x by group                            
                             (Benjamini-Hochberg)                              
Col Mean-|
Row Mean |          1          2          3
---------+---------------------------------
       2 |  -0.616834
         |     0.2687
         |
       3 |   2.330242   4.472250
         |     0.0297    0.0000*
         |
       4 |   0.717342   1.172279  -0.628512
         |     0.3549     0.2411     0.3178

alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 21.30109

$Z
[1] -0.6168349  2.3302421  4.4722501  0.7173426  1.1722795 -0.6285124

$P
[1] 2.686718e-01 9.896680e-03 3.870042e-06 2.365814e-01 1.205424e-01 2.648341e-01

$P.adjusted
[1] 2.686718e-01 2.969004e-02 2.322025e-05 3.548720e-01 2.410849e-01 3.178010e-01

$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"


Testing: Emotional_Neglect 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 6.728, df = 3, p-value = 0.08109

Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.

Testing: Sexual_Abuse 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 9.7111, df = 3, p-value = 0.02119

Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 9.7111, df = 3, p-value = 0.02

                           Comparison of x by group                            
                             (Benjamini-Hochberg)                              
Col Mean-|
Row Mean |          1          2          3
---------+---------------------------------
       2 |   2.585689
         |    0.0292*
         |
       3 |   0.965636  -2.570674
         |     0.3342    0.0152*
         |
       4 |   0.905601  -0.620256   0.415519
         |     0.2739     0.3211     0.3389

alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 9.711097

$Z
[1]  2.5856895  0.9656370 -2.5706744  0.9056014 -0.6202568  0.4155199

$P
[1] 0.004859222 0.167112936 0.005075035 0.182573441 0.267544376 0.338880672

$P.adjusted
[1] 0.02915533 0.33422587 0.01522510 0.27386016 0.32105325 0.33888067

$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"


Testing: Bullying 

    Kruskal-Wallis rank sum test

data:  df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 14.311, df = 3, p-value = 0.002511

Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 14.3111, df = 3, p-value = 0

                           Comparison of x by group                            
                             (Benjamini-Hochberg)                              
Col Mean-|
Row Mean |          1          2          3
---------+---------------------------------
       2 |  -1.028753
         |     0.2277
         |
       3 |   1.381831   3.684295
         |     0.1670    0.0007*
         |
       4 |   0.857417   1.583135   0.107357
         |     0.2347     0.1701     0.4573

alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 14.31115

$Z
[1] -1.0287538  1.3818314  3.6842953  0.8574171  1.5831355  0.1073575

$P
[1] 0.1517976943 0.0835117408 0.0001146681 0.1956072094 0.0566952918 0.4572526697

$P.adjusted
[1] 0.2276965414 0.1670234816 0.0006880087 0.2347286513 0.1700858754 0.4572526697

$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"
# Print names of significant CTQ categories
if (length(significant_tests) > 0) {
  cat("CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:\n")
  cat(paste(significant_tests, collapse = "\n"))
} else {
  cat("No CTQ categories with significant Kruskal-Wallis test.\n")
}
CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:
Physical_Neglect
Emotional_Abuse
Sexual_Abuse
Bullying

df %>%
  group_by(ED_RaceEthCode) %>%
  summarize(
    Mean_CTQ_Tot = mean(CTQ_Total, na.rm = TRUE),
    SD_CTQ_Tot = sd(CTQ_Total, na.rm = TRUE),
    Mean_Phys_Abuse = mean(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
    SD_Phys_Abuse = sd(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
    Mean_Phys_Neglect = mean(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
    SD_Phys_Neglect = sd(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
    Mean_Emo_Abuse = mean(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
    SD_Emo_Abuse = sd(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
    Mean_Emo_Neglect = mean(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
    SD_Emo_Neglect = sd(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
    Mean_Sex_Abuse = mean(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
    SD_Sex_Abuse = sd(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
    Mean_Bullying = mean(WK2_Bullying_Total, na.rm = TRUE),
    SD_Bullying = sd(WK2_Bullying_Total, na.rm = TRUE)
  ) %>%
  gt() %>%
  tab_header(
    title = "Mean and Standard Deviation of CTQ Scores by Race"
  ) %>%
  fmt_number(
    columns = vars(starts_with("Mean"), starts_with("SD")),
    decimals = 2
  )
Mean and Standard Deviation of CTQ Scores by Race
ED_RaceEthCode Mean_CTQ_Tot SD_CTQ_Tot Mean_Phys_Abuse SD_Phys_Abuse Mean_Phys_Neglect SD_Phys_Neglect Mean_Emo_Abuse SD_Emo_Abuse Mean_Emo_Neglect SD_Emo_Neglect Mean_Sex_Abuse SD_Sex_Abuse Mean_Bullying SD_Bullying
1 0.14 1.06 0.15 1.08 0.14 1.01 0.07 1.03 0.11 0.99 0.10 1.08 0.03 1.03
2 −0.06 0.95 −0.03 0.98 −0.12 0.87 0.10 1.02 −0.06 0.92 −0.10 0.89 0.08 0.95
3 0.01 1.02 −0.01 1.00 0.06 1.08 −0.09 0.98 0.02 1.05 0.06 1.06 −0.06 1.02
4 −0.01 0.93 0.05 1.02 0.04 1.04 −0.03 0.98 0.00 1.01 −0.06 0.85 −0.08 0.99
NA −0.28 0.78 −0.43 0.49 −0.25 0.54 −0.10 1.17 −0.27 0.76 −0.11 1.00 −0.29 1.33
# CTQ total x Race
p7 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = CTQ_Total, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ TOTAL by RACE",
       x = "RACE", y = "CTQ TOTAL") + gghisto

# Sexual assault CTQ x Race
p8 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_SexAbu_RS, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ Sexual Abuse by RACE",
       x = "RACE", y = "CTQ Sexual Abuse") + gghisto

# physical abuse x Race
p9 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyAbu_RS, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ Physical Abuse by RACE",
       x = "RACE", y = "CTQ phy Abuse") + gghisto

# physical neglect x Race
p10 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyNeg_RS, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ Physical Neglect by RACE",
       x = "RACE", y = "CTQ phy neglect") + gghisto

#emotional abuse x Race
p11 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoAbu_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
  labs(title = "CTQ Emotional Abuse by RACE",
       x = "RACE", y = "CTQ emo abuse") + gghisto

#emotional neglect x Race
p12 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoNeg_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "CTQ Emotional Neglect by RACE",
       x = "RACE", y = "CTQ emo neglect") + 
  theme(legend.position = "none") + gghisto


#bullying x Race
p13 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_Bullying_Total, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "Bullying by Race", 
       x = " ", y = "Bullying Score") + 
  theme(legend.position = "none") + gghisto                                                                                                                                                         
                                                                                                                        ctq.race.plots_simple <- grid.arrange(p7, p8, p9, p10, p11, p12, p13, ncol = 3) 


#save as pdf
#ggsave("ctq.race.plots_simple.pdf", ctq.race.plots_simple, width = 12, height = 10)


ADI vs PDI

cor_results <- list(
  "ADI vs PDI" = cor.test(df$ADI_NatRank, df$ED_PDI_RS, use="complete.obs", method = "spearman"),
  "ADI vs CTQ" = cor.test(df$ADI_NatRank, df$CTQ_Total, use="complete.obs", method = "spearman"),
  "PDI vs CTQ" = cor.test(df$ED_PDI_RS, df$CTQ_Total, use="complete.obs", method = "spearman"))

#print the number of complete cases for each of ADI, PDI, and CTQ
sum(complete.cases(df$ADI_NatRank))
[1] 2400
sum(complete.cases(df$ED_PDI_RS))
[1] 2341
sum(complete.cases(df$CTQ_Total))
[1] 2480
#print the number of complete cases overlapping between PDI and ADI
sum(complete.cases(df$ADI_NatRank, df$ED_PDI_RS))
[1] 2263
data.frame(
  Comparison = names(cor_results),
  rho = sapply(cor_results, function(x) round(x$estimate, 6)),
  p_value = sapply(cor_results, function(x) round(x$p.value, 6)))
#plot of PDI x CTQ
p1 <- ggplot(df, aes(x = ED_PDI_RS, y = CTQ_Total)) +
  geom_point(alpha = 0.3, size=3) +  
  geom_smooth(method = "lm", se = FALSE, color = "magenta3", lwd=3) +  
  stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) + 
  labs(title = "Scatter Plot of PDI vs CTQ Total",
       x = "PDI", y = "CTQ Total") + gghisto

#Plot of ADI x CTQ
p2 <- ggplot(df, aes(x = ADI_NatRank, y = CTQ_Total)) +
  geom_point(alpha = 0.3, size=3) +  
  geom_smooth(method = "lm", se = FALSE, color = "purple3", lwd=3) +  
  stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) + 
  labs(title = "Scatter Plot of ADI vs CTQ Total",
       x = "ADI", y = "CTQ Total") + gghisto

grid.arrange(p1,p2)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'


Sup. Table 1: Compared Pain Classes

table1_class_features <- c("Pain_Class","ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank","Site_New", "ED_Event_BroadClass", "ED_PDI_RS","PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")

# Create a named list to specify the type of each variable
type_list <- list(
  WK2_CTQSF_PhyAbu_RS = "continuous",
  WK2_CTQSF_EmoAbu_RS = "continuous",
  WK2_CTQSF_SexAbu_RS = "continuous",
  WK2_CTQSF_PhyNeg_RS = "continuous",
  WK2_CTQSF_EmoNeg_RS = "continuous",
  WK2_Bullying_Total = "continuous")

# Generate the summary table and compare across class and add p

suppressMessages(suppressWarnings(
  df %>%
    select(all_of(table1_class_features)) %>%
    tbl_summary(
      by = Pain_Class,
      statistic = all_continuous() ~ "{mean} ({sd})",
      digits = all_continuous() ~ 2,
      type = type_list) %>% add_p)) 
Characteristic low, N = 1,0701 high, N = 3521 moderate, N = 5901 moderate recovery, N = 4681 p-value2
ED_GenderBirthCert



<0.001
    Female 605 (57%) 237 (67%) 409 (69%) 303 (65%)
    Male 465 (43%) 115 (33%) 181 (31%) 165 (35%)
ED_Age 33.36 (12.52) 40.89 (13.19) 37.61 (13.12) 36.81 (14.00) <0.001
ED_RaceEthCode



<0.001
    1 122 (11%) 37 (11%) 77 (13%) 42 (9.0%)
    2 387 (36%) 93 (26%) 201 (34%) 210 (45%)
    3 521 (49%) 210 (60%) 285 (49%) 197 (42%)
    4 39 (3.6%) 12 (3.4%) 23 (3.9%) 17 (3.6%)
    Unknown 1 0 4 2
BMI 29.17 (7.95) 32.15 (9.44) 30.95 (8.28) 29.75 (7.93) <0.001
    Unknown 78 21 38 21
ADI_NatRank 62.03 (28.04) 70.74 (26.08) 66.28 (26.76) 60.04 (27.52) <0.001
    Unknown 41 8 14 17
Site_New



0.3
    Midwest 432 (40%) 139 (39%) 216 (37%) 173 (37%)
    NorthEast 441 (41%) 136 (39%) 267 (45%) 209 (45%)
    SouthEast 197 (18%) 77 (22%) 107 (18%) 86 (18%)
ED_Event_BroadClass



0.12
    MVC/Non-motor Collision 814 (76%) 268 (76%) 469 (79%) 358 (76%)
    Other 152 (14%) 40 (11%) 76 (13%) 72 (15%)
    Physical/Sexual Abuse 104 (9.7%) 44 (13%) 45 (7.6%) 38 (8.1%)
ED_PDI_RS 12.49 (7.09) 15.81 (7.33) 15.00 (7.07) 14.49 (7.13) <0.001
    Unknown 43 27 42 27
PRE_Pain_MdSv 189 (18%) 214 (61%) 251 (43%) 123 (27%) <0.001
    Unknown 2 1 3 4
CTQ_Any 805 (75%) 301 (86%) 490 (83%) 381 (81%) <0.001
CTQ_Total -0.15 (0.92) 0.26 (1.09) 0.13 (1.06) -0.01 (0.97) <0.001
PhyAbu_Any 397 (37%) 193 (55%) 298 (51%) 201 (43%) <0.001
WK2_CTQSF_PhyAbu_RS -0.14 (0.90) 0.32 (1.19) 0.13 (1.05) -0.08 (0.93) <0.001
EmoAbu_Any 623 (58%) 260 (74%) 416 (71%) 328 (70%) <0.001
WK2_CTQSF_EmoAbu_RS -0.18 (0.93) 0.32 (1.11) 0.15 (1.03) 0.00 (0.95) <0.001
SexAbu_Any 297 (28%) 148 (42%) 238 (40%) 184 (39%) <0.001
WK2_CTQSF_SexAbu_RS -0.15 (0.88) 0.21 (1.16) 0.13 (1.09) 0.02 (0.97) <0.001
PhyNeg_Any 460 (43%) 177 (50%) 297 (50%) 212 (45%) 0.012
WK2_CTQSF_PhyNeg_RS -0.03 (1.03) 0.07 (1.01) 0.04 (0.97) -0.04 (0.95) 0.028
EmoNeg_Any 525 (49%) 189 (54%) 327 (55%) 243 (52%) 0.078
WK2_CTQSF_EmoNeg_RS -0.05 (1.00) 0.06 (1.03) 0.03 (0.96) 0.03 (1.01) 0.080
Bullying_Any 814 (76%) 290 (82%) 476 (81%) 397 (85%) <0.001
WK2_Bullying_Total -0.15 (0.95) 0.28 (1.11) 0.09 (1.02) 0.04 (0.95) <0.001
1 n (%); Mean (SD)
2 Pearson’s Chi-squared test; Kruskal-Wallis rank sum test


Category distribution plots

p1 <- ggplot(df_traj %>% filter(!is.na(ED_Event_BroadClass)), aes(x = ED_Event_BroadClass, fill=ED_Event_BroadClass)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Trauma Type", y = "Count") + gghisto2

p2 <- ggplot(df_traj %>% filter(!is.na(ED_highestgrade)), aes(x = ED_highestgrade, fill=ED_highestgrade)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Education",
       y = "Count") + gghisto2

p3 <- ggplot(df_traj %>% filter(!is.na(ED_Marital)), aes(x = ED_Marital, fill=ED_Marital)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Marital Status",
       y = "Count")  + gghisto2

p4 <- ggplot(df_traj %>% filter(!is.na(Pain_Class)), aes(x = Pain_Class, fill=Pain_Class)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Pain Class",
       y = "Count") + gghisto2

grid.arrange(p1, p2, p3, p4, ncol = 2)


Chi Sq function

#run chi square test for all categorical variables
chi_sq_results <- function(df, features, group_var) {
  test_feature <- function(feature) {
    if (!feature %in% names(df)) {
      return(data.frame(
        Feature = feature,
        Chi_Square = NA,
        P_Value = NA
      ))}
    chi_sq_test <- tryCatch(
      chisq.test(table(df[[feature]], df[[group_var]])),
      error = function(e) {
        return(list(statistic = NA, p.value = NA))
      })
    chi_squared <- chi_sq_test$statistic
    p_value <- chi_sq_test$p.value
    
    return(data.frame(
      Feature = feature,
      Chi_Square = chi_squared,
      P_Value = p_value
    ))}
  
  results <- map_dfr(features, test_feature)
  return(results)
}

categorical_features <- c("ED_GenderBirthCert", "ED_RaceEthCode", "Site_New", "ED_Event_BroadClass", "PRE_Pain_MdSv", "CTQ_Any", "PhyAbu_Any", "EmoAbu_Any", "SexAbu_Any", "PhyNeg_Any", "EmoNeg_Any", "Bullying_Any")

chi.sq_table <- chi_sq_results(df, categorical_features, "Pain_Class")
chi.sq_table

#save chi sq table as csv
#write.csv(chi.sq_table, "chi_sq_table.csv")


KW function

kruskal_wallis_results <- function(df, features, group_var) {
  test_feature <- function(feature) {
    if (!feature %in% names(df)) {
      return(data.frame(
        Feature = feature,
        Chi_Square = NA,
        P_Value = NA
      ))}
    kw_test <- tryCatch(
      kruskal.test(df[[feature]] ~ df[[group_var]]),
      error = function(e) {
        return(list(statistic = NA, p.value = NA))
      })
    chi_squared <- kw_test$statistic
    p_value <- kw_test$p.value
    
    return(data.frame(
      Feature = feature,
      Chi_Square = chi_squared,
      P_Value = p_value
    ))}
  
  results <- map_dfr(features, test_feature)
  return(results)
}

continuous_features <- c("WK2_CTQSF_PhyAbu_RS", "WK2_CTQSF_EmoAbu_RS", "WK2_CTQSF_SexAbu_RS", 
                         "WK2_CTQSF_PhyNeg_RS", "WK2_CTQSF_EmoNeg_RS", "WK2_Bullying_Total", 
                         "ED_Age", "BMI", "ADI_NatRank", "ED_PDI_RS")

kw_table <- kruskal_wallis_results(df, continuous_features, "Pain_Class")

kw_table
#save kw table as csv
#write.csv(kw_table, "kw_table.csv")


Models

Functions to perform BH Correcations

#function to apply BH correction to list of models

apply_bh_correction <- function(models, model_names) {
  correct_model <- function(model) {
    model_results <- tidy(model, exponentiate = TRUE) %>%
      filter(term != "(Intercept)") %>%
      select(y.level,term, estimate, p.value)
    
    model_results <- model_results %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH")) %>%
      mutate(
        p.value = round(p.value, digits = 6),
        p_adj_bh = round(p_adj_bh, digits = 6)
      ) %>%
      mutate(
        p.value = format(p.value, scientific = FALSE),
        p_adj_bh = format(p_adj_bh, scientific = FALSE)
      )
    return(model_results)
  }
  
  corrected_models <- lapply(models, correct_model)
  names(corrected_models) <- model_names
  
  return(corrected_models)
}
#function to get adjusted p values from list of models

extract_p_values <- function(model_df, model_name) {
  model_df %>%
    mutate(model = model_name) %>%
    select(y.level, term, p.value,p_adj_bh, model)}

Models 33-34 Pre-pain logistic regression

# Create binary variables for high/low CTQ and Bullying
df <- df %>%
  mutate(
    CTQ_HighLow = ifelse(CTQ_Total > 6, "High", "Low"),
    Bullying_HighLow = ifelse(WK2_Bullying_Total > 3, "High", "Low"))

# Create binary variable for PRE_Pain_MdSv
df$PRE_Pain_MdSv2 <- ifelse(df$PRE_Pain_MdSv == "Yes", 1, 0)

# M33 Logistic regression using CTQ composite
m33 <- glm(PRE_Pain_MdSv2 ~ CTQ_Total, data = df, family = "binomial")

tidy(m33, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric)))

# M34 Logistic regression using Bullying composite
m34 <- glm(PRE_Pain_MdSv2 ~ WK2_Bullying_Total, data = df, family = "binomial")
tidy(m34, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric)))

# Chi-square test for CTQ_HighLow
chisq.test(table(df$CTQ_HighLow, df$PRE_Pain_MdSv))

    Chi-squared test for given probabilities

data:  table(df$CTQ_HighLow, df$PRE_Pain_MdSv)
X-squared = 339.7, df = 1, p-value < 2.2e-16
# Chi-square test for Bullying_HighLow
chisq.test(table(df$Bullying_HighLow, df$PRE_Pain_MdSv))

    Chi-squared test for given probabilities

data:  table(df$Bullying_HighLow, df$PRE_Pain_MdSv)
X-squared = 339.7, df = 1, p-value < 2.2e-16
# chi-square test for any CTQ
chisq.test(table(df$CTQ_Any, df$PRE_Pain_MdSv))

    Pearson's Chi-squared test with Yates' continuity correction

data:  table(df$CTQ_Any, df$PRE_Pain_MdSv)
X-squared = 23.917, df = 1, p-value = 1.006e-06
# chi-square test for any bullying
chisq.test(table(df$Bullying_Any, df$PRE_Pain_MdSv))

    Pearson's Chi-squared test with Yates' continuity correction

data:  table(df$Bullying_Any, df$PRE_Pain_MdSv)
X-squared = 4.2373, df = 1, p-value = 0.03955


Model 1: Base Model + VIF

Main multi-nominal regression model with CTQ as a predictor variable, including our standard covariates (same in linear models), and latent pain trajectories as the dependent variables. Model 2 is this with extra features.

#pain trajectory ~ age + sex + race + site + CTQ
m1 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total, data = df)
# weights:  40 (27 variable)
initial  value 3428.305955 
iter  10 value 3145.752220
iter  20 value 3098.312611
iter  30 value 3081.080649
final  value 3080.567070 
converged
#view output
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))  %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH"))

#examine variance inflation factor to assess collinearity
vif(m1)
Warning in vif.default(m1) : No intercept: vifs may not be sensible.
                        GVIF Df GVIF^(1/(2*Df))
ED_Age             12.103945  1        3.479072
Site_New            3.792127  2        1.395471
ED_RaceEthCode     16.213444  3        1.590911
ED_GenderBirthCert  1.753029  1        1.324020
CTQ_Total           1.371539  1        1.171127

Variance Inflation Factor (VIF) is used to detect multicollinearity. VIF > 10: Indicates high multicollinearity that might be problematic. VIF between 5 and 10: Moderate multicollinearity that might need addressing. VIF < 5: Generally considered acceptable.


Model 2: Additional Features + VIF

m2 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2741.339524
iter  20 value 2550.555155
iter  30 value 2496.401666
iter  40 value 2482.971837
iter  50 value 2482.225214
final  value 2482.128451 
converged
tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))  %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH"))

vif(m2)
Warning in vif.default(m2) : No intercept: vifs may not be sensible.
                         GVIF Df GVIF^(1/(2*Df))
ED_Age              17.705104  1        4.207743
Site_New             4.738185  2        1.475377
ED_RaceEthCode      19.673930  3        1.643041
ED_GenderBirthCert   1.941991  1        1.393553
CTQ_Total            1.448485  1        1.203530
BMI                 18.363080  1        4.285216
PRE_Pain_MdSv        3.067227  1        1.751350
ED_Marital          10.289081  2        1.790994
ADI_NatRank         12.774021  1        3.574076
ED_Event_BroadClass  2.156180  2        1.211773
ED_PDI_RS            7.261578  1        2.694732


Model 15: M2 + CTQ*Sex interaction

m15 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_GenderBirthCert, data = df)
# weights:  76 (54 variable)
initial  value 2905.672981 
iter  10 value 2740.351683
iter  20 value 2546.525161
iter  30 value 2493.081747
iter  40 value 2480.739654
iter  50 value 2479.964436
iter  60 value 2479.938816
final  value 2479.937746 
converged
tidy(m15, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))


Model 14: M2 + CTQ*race interaction

m14 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_RaceEthCode, data = df)
# weights:  84 (60 variable)
initial  value 2905.672981 
iter  10 value 2738.781285
iter  20 value 2545.566498
iter  30 value 2491.153216
iter  40 value 2477.385899
iter  50 value 2476.228437
iter  60 value 2476.171183
final  value 2476.169885 
converged
tidy(m14, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))


Models 16-21: M2 + each CTQ subtype + bullying

#physical abuse m16
m16 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2739.152319
iter  20 value 2548.076833
iter  30 value 2486.719546
iter  40 value 2478.571787
iter  50 value 2477.992574
final  value 2477.950997 
converged
#physical neglect m17
m17 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.269534
iter  20 value 2580.210494
iter  30 value 2504.715204
iter  40 value 2492.497525
iter  50 value 2491.587041
final  value 2491.509002 
converged
#emotional abuse m18
m18 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2739.343013
iter  20 value 2549.575062
iter  30 value 2486.449855
iter  40 value 2475.071211
iter  50 value 2474.358717
final  value 2474.325475 
converged
#emotional neglect m19
m19 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.115876
iter  20 value 2591.191332
iter  30 value 2503.297991
iter  40 value 2491.076542
iter  50 value 2490.377414
final  value 2490.335809 
converged
#sexual abuse m20
m20 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2742.336087
iter  20 value 2564.102062
iter  30 value 2497.718946
iter  40 value 2487.454342
iter  50 value 2486.822604
final  value 2486.781130 
converged
#bullying m21
m21 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2742.146186
iter  20 value 2551.065763
iter  30 value 2488.904082
iter  40 value 2480.381656
iter  50 value 2479.548137
final  value 2479.495450 
converged
tidy(m16, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m17, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m18, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m19, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m20, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m21, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

BH Across M16-M21

perform BH corrections on the values in combined_p_values_m16_m21

#BH corrections for models with subtypes m16-m21

#make first set of adjusted within p vals
models_list_subtypes <- list(m16, m17, m18, m19, m20, m21)
model_names2 <- c("m16", "m17", "m18", "m19", "m20", "m21")
adjusted_models2 <- apply_bh_correction(models_list_subtypes, model_names2)

combined_p_values_m16_m21 <- map2_dfr(adjusted_models2, paste0("m", 16:21), extract_p_values)

double_corrected_p_values_16_m21 <- combined_p_values_m16_m21 %>%
  mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
  mutate(
    p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
    p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE)  )

double_corrected_p_values_16_m21


Models 26-31: M2 + BINARY INDICATORS

M26: M2 + PhyAbu ANY

#Model 2 but instead of physical abuse score, its a binary score of whether or not the pt had any physical abuse

m26 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.215465
iter  20 value 2538.670319
iter  30 value 2490.267312
iter  40 value 2481.883743
iter  50 value 2481.407674
final  value 2481.370933 
converged
tidy(m26, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))%>%
  mutate(p_adj_bh = p.adjust(p.value, method = "BH"))

Model 27: M2 + Emo_Abuse ANY

#Model 2 but instead of emotional abuse score, its a binary score of whether or not the pt had any emotional abuse

m27 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.545586
iter  20 value 2541.516221
iter  30 value 2489.192593
iter  40 value 2482.375221
iter  50 value 2482.194772
final  value 2482.178232 
converged
tidy(m27, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))

Model 28: M2 + Emo_Neglect ANY

#Model 2 but instead of emotional neglect score, its a binary score of whether or not the pt had any emotional neglect

m28 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.664578
iter  20 value 2558.737778
iter  30 value 2495.816626
iter  40 value 2490.341546
iter  50 value 2490.056292
final  value 2490.038111 
converged
tidy(m28, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))

Model 29: M2 + Phy_Neglect ANY

#Model 2 but instead of physical neglect score, its a binary score of whether or not the pt had any physical neglect

m29 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.702303
iter  20 value 2548.462391
iter  30 value 2494.585863
iter  40 value 2489.264715
iter  50 value 2489.000832
final  value 2488.979296 
converged
tidy(m29, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))

Model 30: M2 + Sexual Abuse ANY

#Model 2 but instead of sexual abuse score, its a binary score of whether or not the pt had any sexual abuse

m30 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + SexAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.380632
iter  20 value 2545.148484
iter  30 value 2492.757380
iter  40 value 2486.885581
iter  50 value 2486.521490
final  value 2486.501395 
converged
tidy(m30, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))

Model 31: M2 + Bullying ANY

#Model 2 but instead of bullying score, its a binary score of whether or not the pt had any bullying

m31 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + Bullying_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights:  72 (51 variable)
initial  value 2905.672981 
iter  10 value 2746.165693
iter  20 value 2562.600366
iter  30 value 2494.807104
iter  40 value 2488.444171
iter  50 value 2488.177859
final  value 2488.159649 
converged
tidy(m31, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))


BH Across M26-M31

#BH BETWEEN Corrections for models with binary indicators m26-m31

models_list_ANY <- list(m26, m27, m28, m29, m30, m31)
model_names3 <- c("m26", "m27", "m28", "m29", "m30", "m31")
adjusted_models3 <- apply_bh_correction(models_list_ANY, model_names3)

double_corrected_p_values_m26_m31 <- map2_dfr(adjusted_models3, paste0("m", 26:31), extract_p_values)

#perform BH corrections on the values in double_corrected_p_values_m26_m31
double_corrected_p_values_m26_m31 <- double_corrected_p_values_m26_m31 %>%
  mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
  mutate(
    p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
    p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE)  )

double_corrected_p_values_m26_m31


Model 13: M2 + CTQ_Triad

m13 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad + BMI + PRE_Pain_MdSv + ED_Concussion + ED_Event_BroadClass + ED_PDI_RS + ADI_NatRank, data = df)
# weights:  68 (48 variable)
initial  value 2915.377041 
iter  10 value 2748.168396
iter  20 value 2550.846915
iter  30 value 2496.762252
iter  40 value 2487.701025
iter  50 value 2487.230911
final  value 2487.224790 
converged
tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))  %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH"))


---
title: "AURORA ELA Analysis for Publication"
author: "Alice Woolard"
date: "September 2024"
output: html_notebook
---

This document contains the code for the AURORA ELA analysis published in the manuscript, *Early life adversity increases risk for chronic posttraumatic pain, data from humans and rodents* by McKibben et al. In this analysis, we  explore the relationship between childhood trauma (measured via CTQ), bullying, and pain trajectory in the AURORA dataset. 

For additional work by our research team, see [Linnstaedt Lab Website](https://www.med.unc.edu/anesthesiology/linnstaedtlab/)

### Set up

#### Import libraries

```{r, message=FALSE, warning=FALSE}
rm(list = ls())

libraries <- c("ggsignif","tidyverse", "rstatix", "corrplot", "nlme", "sjPlot", "readxl", "nnet", "chisq.posthoc.test","broom", "knitr","lmtest", "ggpubr","gt","forcats","car", "highcharter", "ggplot2", "corrr", "ggcorrplot", "gridExtra", "skimr", "pROC", "gtsummary", "dunn.test","tidyr", "purrr", "dplyr")

suppressMessages(suppressWarnings(
  invisible(lapply(libraries, library, character.only = TRUE))))

# Explicitly set select to refer to dplyr::select
select <- dplyr::select
```

<br>

#### Define plot elements

```{r, warning=FALSE, message=FALSE}
gghisto <- list(
  theme(axis.text.x = element_text(face="bold", color="cornflowerblue", size=14, angle= 20),
          axis.text.y = element_text(face="bold", color="royalblue4", 
          size=16, angle=25), axis.title=element_text(size=17,face="bold"),
          plot.title = element_text(size=14,face="bold.italic")))

gghisto2 <- list(
  theme(axis.text.x = element_text(face="bold", color="cornflowerblue", size=8, angle=6),
          axis.text.y = element_text(face="bold", color="royalblue4", 
          size=12), axis.title=element_text(size=14,face="bold"),
          plot.title = element_text(size=14,face="bold.italic")))

color_fill <- scale_fill_manual(values = c("1" = "violetred1", "2" = "steelblue1")) 
```

<br>

#### Import all freeze 4 datasets


```{r, warning=FALSE, message=FALSE}
#Read in the dataset
data_1 <- read.csv("AURORA_full_update02102022_New.csv",check.names=F)[-1]

#freeze 4 datasets
AURORA_Freeze_4_general_mod <- read_csv("AURORA_Freeze_4_general_mod.csv",na = ".")
AURORA_Freeze_4_demogr_mod <- read_csv("AURORA_Freeze_4_demogr_mod.csv",na = ".")
AURORA_Freeze_4_pain_mod <- read_csv("AURORA_Freeze_4_pain_mod.csv",na = ".")

AURORA_Freeze_4_ctq_mod <- read_csv("AURORA_Freeze_4_ctq_mod.csv",na = ".")
AURORA_F4_CTQ_item <- readxl::read_excel("AURORA_F4_CTQ_item.xlsx")
AURORA_Freeze_4_pdi_mod <- read_csv("AURORA_Freeze_4_pdi_mod.csv",na = ".")
SiteID <- read_excel("SiteID.xlsx")
```

```{r, warning=FALSE, message=FALSE}
FZ4_df <- AURORA_Freeze_4_demogr_mod %>% inner_join(AURORA_F4_CTQ_item,by="PID") %>% 
  inner_join(AURORA_Freeze_4_pain_mod,by="PID") %>% 
  inner_join(AURORA_Freeze_4_ctq_mod,by="PID") %>% 
  inner_join(AURORA_Freeze_4_pdi_mod,by="PID") %>% 
  inner_join(SiteID,by="PID") %>% 
  mutate(Site_New = case_when(SiteID %in% c("Baystate","BMC", "Beth Israel", "BWH", "Cooper","Einstein","Jefferson","MGH","Miriam","Penn","Rhode Island","St. John","St. Joseph","Temple","UMass") ~ "NorthEast",SiteID %in% c("Baylor", "Emory ED", "Jacksonville","UAB","UNC","UT Houston","UT Southwestern","Vanderbilt") ~ "SouthEast", SiteID %in% c("39","49","Beaumont Royal Oak","Beaumont Troy","Eskenazi","Henry Ford","Cincinnati","Indiana","WashU","WashU DP") ~ "Midwest")) %>% 
  convert_as_factor(Site_New,ED_GenderBirthCert,ED_GenderNow,ED_Marital,ED_RaceEthCode,ED_highestgrade,WK2_EmploymentCode,WK2_IncomeCode) %>% 
  mutate_at(vars(contains("WK2_Childhood")),as.numeric) %>% dplyr::select(-SiteID) %>% #2682
  mutate(WK2_Bullying_Total = WK2_ChildhoodBullying+WK2_ChildhoodHitOrHurt) %>% 
  inner_join(data_1 %>% dplyr::select(PID,WK8_Pain_C,M3_Pain_C,M6_Pain_C),by="PID") %>% 
  filter(is.na(WK2_CTQSF_SexAbu_RS) == F) %>% 
  filter(is.na(WK2_CTQSF_PhyAbu_RS)== F) %>% 
  filter(is.na(WK2_CTQSF_EmoAbu_RS)== F) %>% 
  filter(is.na(WK2_CTQSF_EmoNeg_RS) == F) %>% 
  filter(is.na(WK2_CTQSF_PhyNeg_RS) == F) %>% 
  filter(is.na(WK2_Bullying_Total) == F) %>% #2483
  mutate(log_WK2_CTQSF_Total_RS = log2(WK2_CTQSF_Total_RS+1),log_ED_NowPain=log2(ED_NowPain+1))
```

<br>

\vspace{-5truemm}

```{r, warning=FALSE, message=FALSE}
# Import datasets
pain_trajectory <- read_csv("pain_latent_class_4.csv")
PTSD_df <- read_excel("AURORA_Freeze_4_ptsd_mod.xlsx")
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
Injury_df <- read_excel("AURORA_Freeze_4_injury_mod.xlsx")
ADI_df <- read_excel("AURORA_Freeze_4_ses_mod.xlsx",na = ".")

# Combine datasets
df_traj <- FZ4_df %>%
  inner_join(Trauma_df %>% select(PID, ED_Event_BroadClass), by = "PID") %>%
  inner_join(Injury_df %>% select(PID, ED_Concussion), by = "PID") %>%
  inner_join(ADI_df %>% select(PID, ADI_NatRank), by = "PID") %>%
  inner_join(pain_trajectory %>% select(PID, pain_Class_MostLikely), by = "PID")
```

<br>

#### Cleaning: Relevel and refactor features

Refactor pain trajectories and Relevel to "low pain" as the reference group

```{r}
df_traj <- df_traj %>%
  rename(Pain_Class = pain_Class_MostLikely)

df_traj <- df_traj %>%
  mutate(Pain_Class = case_when(
    Pain_Class == "Class 1" ~ "moderate recovery",
    Pain_Class == "Class 2" ~ "moderate",
    Pain_Class == "Class 3" ~ "low",
    Pain_Class == "Class 4" ~ "high",
    TRUE ~ as.character(Pain_Class)  # This handles any unexpected values
  )) %>%
  mutate(Pain_Class = factor(Pain_Class)) %>%
  mutate(Pain_Class = relevel(Pain_Class, ref = "low"))
```

Factorize and rename categorical variables

```{r}
factor_vars <- c("ED_GenderBirthCert", "ED_Event_BroadClass", "ED_Marital", 
                 "WK2_EmploymentCode", "ED_highestgrade", "ED_RaceEthCode", "PRE_Pain_MdSv")
df_traj[factor_vars] <- lapply(df_traj[factor_vars], as.factor)

#in gender, change 1 to male and 2 to female
df_traj$ED_GenderBirthCert <- ifelse(df_traj$ED_GenderBirthCert == 1, "Male", "Female")

#in pre pain, change 1 to yes and 0 to no .. is this correct?
df_traj$PRE_Pain_MdSv <- ifelse(df_traj$PRE_Pain_MdSv == 1, "Yes", "No")

# Convert continuous variables to numeric
df_traj$ED_PDI_RS <- as.numeric(df_traj$ED_PDI_RS)
df_traj$ED_Concussion <- as.numeric(df_traj$ED_Concussion)

#rename WK2 CTQ total column to CTQ_Total
df_traj <- df_traj %>%
  rename(CTQ_Total = WK2_CTQSF_Total_RS)

#print column headers that contain the letters "ED"
colnames(df_traj)[grep("ED", colnames(df_traj))]

```

<br>

General clean up, renaming, removing negatives

```{r}
#Make any value less than 0 in the bullying total == NA
df_traj$WK2_Bullying_Total <- ifelse(df_traj$WK2_Bullying_Total < 0, NA, df_traj$WK2_Bullying_Total)

#Make any value less than 0 in ADI == NA
df_traj$ADI_NatRank <- ifelse(df_traj$ADI_NatRank < 0, NA, df_traj$ADI_NatRank)
```

<br>

Remove three bullying rows have negative numbers

```{r, warning=FALSE, message=FALSE}
# Remove the rows in WK2_Bullying_Total that contain negative values
df_traj <- df_traj %>% filter(WK2_Bullying_Total >= 0)
```

<br>

Relevel categories for trauma type

```{r}
df_traj <- df_traj %>%
  mutate(ED_Event_BroadClass = case_when(
    ED_Event_BroadClass == "1" ~ "MVC/Non-motor Collision",
    ED_Event_BroadClass == "6" ~ "MVC/Non-motor Collision",
    ED_Event_BroadClass == "2" ~ "Physical/Sexual Abuse",
    ED_Event_BroadClass == "3" ~ "Physical/Sexual Abuse",
    ED_Event_BroadClass == "4" ~ "Other",
    ED_Event_BroadClass == "5" ~ "Other",
    ED_Event_BroadClass == "7" ~ "Other",
    ED_Event_BroadClass == "8" ~ "Other",
    ED_Event_BroadClass == "9" ~ "Other",
    ED_Event_BroadClass == "10" ~ "Other",
    ED_Event_BroadClass == "11" ~ "Other",
    TRUE ~ as.character(ED_Event_BroadClass)  # Keeps any other values unchanged
  ))

# Convert ED_Event broad class to factor
df_traj$ED_Event_BroadClass <- factor(df_traj$ED_Event_BroadClass)

table(df_traj$ED_Event_BroadClass)
```

<br>

Relevel categories for education

```{r}
#relevel education category
# 1- did not finish HS (codes 0-12) 2- HS grad + some college (codes 13-15) 3- college grad (Bachelors or Associates) (codes 16-18) 4- post grad codes(19-21)
df_traj <- df_traj %>%
  mutate(ED_highestgrade = case_when(
    ED_highestgrade == "0" ~ "Did not finish HS",
    ED_highestgrade == "1" ~ "Did not finish HS",
    ED_highestgrade == "7" ~ "Did not finish HS",
    ED_highestgrade == "8" ~ "Did not finish HS",
    ED_highestgrade == "9" ~ "Did not finish HS",
    ED_highestgrade == "10" ~ "Did not finish HS",
    ED_highestgrade == "11" ~ "Did not finish HS",
    ED_highestgrade == "12" ~ "Did not finish HS",
    ED_highestgrade == "13" ~ "HS Grad/Some College",
    ED_highestgrade == "14" ~ "HS Grad/Some College",
    ED_highestgrade == "15" ~ "HS Grad/Some College",
    ED_highestgrade == "16" ~ "Bach or Assoc degree",
    ED_highestgrade == "17" ~ "Bach or Assoc degree",
    ED_highestgrade == "18" ~ "Bach or Assoc degree",
    ED_highestgrade == "19" ~ "Post-graduate education",
    ED_highestgrade == "20" ~ "Post-graduate education",
    ED_highestgrade == "21" ~ "Post-graduate education",
    ED_highestgrade == "-7" ~ NA,
    ED_highestgrade == "-5" ~ NA,
    TRUE ~ as.character(ED_highestgrade)  # Keeps any other values unchanged
  ))

# Convert to factor
df_traj$ED_highestgrade <- factor(df_traj$ED_highestgrade)

table(df_traj$ED_highestgrade)
```

<br>

Re-level marriage category

```{r}
#marital status 1-married 2-separated 3-divorced 4-annulled 5-widowed 6-single
#4. marriage - combine 2+3+4+5 as a single category

df_traj <- df_traj %>%
  mutate(ED_Marital = case_when(
    ED_Marital == "1" ~ "Married",
    ED_Marital == "2" ~ "Divorced/Widowed",
    ED_Marital == "3" ~ "Divorced/Widowed",
    ED_Marital == "4" ~ "Divorced/Widowed",
    ED_Marital == "5" ~ "Divorced/Widowed",
    ED_Marital == "6" ~ "Single",
    TRUE ~ as.character(ED_Marital)  # Keeps any other values unchanged
  ))

# Convert to factor
df_traj$ED_Marital <- factor(df_traj$ED_Marital)

table(df_traj$ED_Marital)
```

<br>

#### Subset to final df for analysis

Subset df_traj to our final df that will be used for model analysis, keeping only the features needed.

```{r}
df <- df_traj %>%
  select(PID, Pain_Class,ED_Age, Site_New, ED_RaceEthCode, ED_GenderBirthCert, ADI_NatRank, CTQ_Total, 
         BMI, PRE_Pain_MdSv, WK2_EmploymentCode, ED_Marital, ED_highestgrade, 
         ED_Concussion, ED_Event_BroadClass, ED_PDI_RS, WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_EmoAbu_RS, WK2_CTQSF_SexAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoNeg_RS, WK2_Bullying_Total)
```

<br>

Add the CTQ triad score that is a combination of bullying, physical abuse, and emotional abuse scores.

```{r}
#Add combination CTQ score
df$CTQ_Triad <- df$WK2_Bullying_Total + df$WK2_CTQSF_PhyAbu_RS + df$WK2_CTQSF_EmoAbu_RS

#Print the rows that have NA in the CTQ triad
df %>% filter(is.na(CTQ_Triad))
#Confirms that for the CTQ triad, if any of the additive features contain NA, then the final score is NA

#Add the two different types of bullying question
df <- df %>%
  left_join(FZ4_df %>% select(PID, BulliedSimple = 20, BulliedHitOrHurt = 21), by = "PID")

#Remove first column, PID, as this is unneeded 
df <- df %>% select(-PID)

#make all of df[,c(2,6:8)]) numeric
df[,c(2,6:8)] <- lapply(df[,c(2,6:8)], as.numeric)
```

<br>

Binary scores for later models

```{r}
df$Bullying_Any <- ifelse(df$WK2_Bullying_Total > 0, 1, 0)
df$PhyAbu_Any <- ifelse(df$WK2_CTQSF_PhyAbu_RS > 0, 1, 0)
df$EmoAbu_Any <- ifelse(df$WK2_CTQSF_EmoAbu_RS > 0, 1, 0)
df$SexAbu_Any <- ifelse(df$WK2_CTQSF_SexAbu_RS > 0, 1, 0)
df$PhyNeg_Any <- ifelse(df$WK2_CTQSF_PhyNeg_RS > 0, 1, 0)
df$EmoNeg_Any <- ifelse(df$WK2_CTQSF_EmoNeg_RS > 0, 1, 0)
df$CTQ_Any <- ifelse(df$CTQ_Total > 0, 1, 0)
```

<br>

#### Z-transform scaling

Z-scaling is a method of standardizing the data so that it has a mean of 0 and a standard deviation of 1. This is done to make the data more interpretable and to ensure that the variables are on the same scale. The `scale` function works by subtracting the mean of the data from each value and then dividing by the standard deviation.

```{r}
#scale
df[, c(7, 16:22)] <- scale(df[, c(7, 16:22)])
df[, c(7, 16:22)] <- lapply(df[, c(7, 16:22)], function(x) as.numeric(scale(x)))

summary(df)
```

```{r}
#print median of each ctq type and bullying
df %>% summarize(median(CTQ_Total), median(WK2_CTQSF_PhyAbu_RS), median(WK2_CTQSF_EmoAbu_RS), median(WK2_CTQSF_SexAbu_RS), median(WK2_CTQSF_PhyNeg_RS), median(WK2_CTQSF_EmoNeg_RS), median(WK2_Bullying_Total))

#make boxplots of each ctq type and bullying
#bullying has the highest median after z-score transforming, so it has the highest values
df %>% pivot_longer(cols = c(7, 17:22), names_to = "CTQ_Type", values_to = "CTQ_Score") %>%
  ggplot(aes(x = CTQ_Type, y = CTQ_Score)) +
  geom_boxplot() +
  labs(title = "Boxplot of CTQ Scores", x = "CTQ Type", y = "CTQ Score") + gghisto
```

<br>

<br>

### Preliminary Analysis

\vspace{-5truemm}

<br>

</br>

#### Table 1: Participant Characteristics

Table 1: Socio-demographic variables

```{r}
table1a_features <- c("ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank")

suppressMessages(suppressWarnings(
  df %>%
    select(all_of(table1a_features)) %>%
    tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", 
      digits = all_continuous() ~ 2)))
```

<br>

Table 1: ED/Trauma-related variable.names

```{r}
table1b_features <- c("Site_New", "ED_Event_BroadClass", "ED_PDI_RS")

suppressMessages(suppressWarnings(
df %>%
    select(all_of(table1b_features)) %>%
    tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", 
      digits = all_continuous() ~ 2)))
```

<br>

Table 1: Past pain/stress variables

```{r}
table1c_features <- c("PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")

# Create a named list to specify the type of each variable
type_list <- list(
  WK2_CTQSF_PhyAbu_RS = "continuous",
  WK2_CTQSF_EmoAbu_RS = "continuous",
  WK2_CTQSF_SexAbu_RS = "continuous",
  WK2_CTQSF_PhyNeg_RS = "continuous",
  WK2_CTQSF_EmoNeg_RS = "continuous",
  WK2_Bullying_Total = "continuous")

# Generate the summary table
suppressMessages(suppressWarnings(
  df %>%
    select(all_of(table1c_features)) %>%
    tbl_summary(
      statistic = all_continuous() ~ "{mean} ({sd})",
      digits = all_continuous() ~ 2,
      type = type_list)))
```

<br> <br>

#### Descriptive Stats

```{r}
#calculate the % of participants reported experiencing at least one instance of any type of ELA (i.e. either CTQ≥1 or childhood Bullying ≥1)  and the M (SD) for composite CTQ
df %>% 
  summarize(perc_any_ELA = mean(CTQ_Total >= 1 | WK2_Bullying_Total >= 1, na.rm = TRUE),
            mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
            sd_CTQ = sd(CTQ_Total, na.rm = TRUE))
```

How many people had at a score of 1 or more for at least two different CTQ subtypes?

```{r}
# Create a logical matrix where TRUE indicates a score of 1 or more
logical_matrix <- df %>%
  select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS, 
         WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
  mutate(across(everything(), ~ . >= 1))

# Count the number of TRUE values in each row
df$ctq_total_count <- rowSums(logical_matrix)

# Filter individuals with at least 2 CTQ subtypes with scores of 1 or more
sum(df$ctq_total_count >= 2)

#what is the most common ctq subtype with percentages out of 2480
df %>% 
  select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS, 
         WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
  pivot_longer(cols = everything(), names_to = "CTQ_Subtype", values_to = "Score") %>%
  filter(Score >= 1) %>%
  count(CTQ_Subtype) %>%
  arrange(desc(n)) %>%
  mutate(perc = n/2480)
```

```{r}
#how many cases in the df had either bullying score 1+ or total CTQ score 1+
df %>% filter(CTQ_Total >= 1 | WK2_Bullying_Total >= 1) %>% nrow()

#ttest comparing ctq in men vs women
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)

#caclulcate std in addition to means for sex
df %>% 
  group_by(ED_GenderBirthCert) %>% 
  summarize(mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
            sd_CTQ = sd(CTQ_Total, na.rm = TRUE))

#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
```

<br>

#### Correlations, Heatmap

```{r, warning=FALSE, message=FALSE}
# Function to calculate Spearman correlations, p-values, and number of complete cases
correlation_summary <- function(x, y) {
  cor_test <- cor.test(df[[x]], df[[y]], method = "spearman")
  
  tibble(
    Variable1 = x,
    Variable2 = y,
    Rho = cor_test$estimate,
    P_Value = cor_test$p.value,
  )
}

# Define the columns to correlate
columns <- colnames(df)[16:22]

# Create a summary table
all_ctq_correlations <- expand_grid(Variable1 = columns, Variable2 = columns) %>%
  filter(Variable1 != Variable2) %>%
  pmap_dfr(~ correlation_summary(..1, ..2))

#save CTQ correlations to a CSV
#write.csv(all_ctq_correlations, "all_ELA_correlations_for_Lauren.csv")
```

Heatmap of correlations

```{r, warning=FALSE, message=FALSE}
# Compute the correlation matrix
cor_matrix <- cor(df[,c(16:22)], use = "complete.obs", method="spearman")

# Plot the correlation matrix
# Supplementary Figure 1
ggcorrplot(cor_matrix, lab = TRUE)
```

<br>

#### Sup. Fig. 2: SEX x CTQ stats+plots

```{r}
#Look at the relationship between ED_GenderBirthCert and CTQ_Total
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
p1 <- ggplot(df, aes(x = ED_GenderBirthCert, y = CTQ_Total)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ TOTAL by GENDER",
       x = "GENDER",
       y = "CTQ TOTAL") + gghisto

# Sexual assault CTQ x Sex
t.test(WK2_CTQSF_SexAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
p2 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_SexAbu_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Sexual Abuse by GENDER",
       x = "GENDER",
       y = "CTQ Sexual Abuse") + gghisto

# physical abuse x Sex
t.test(WK2_CTQSF_PhyAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
p3 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyAbu_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Physical Abuse by GENDER",
       x = "GENDER",
       y = "CTQ phy Abuse") + gghisto

# physical neglect x sex
t.test(WK2_CTQSF_PhyNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
p4 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyNeg_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Physical Neglect by GENDER",
       x = "GENDER",
       y = "CTQ phy neglect") + gghisto

#emotional abuse x sex
t.test(WK2_CTQSF_EmoAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
p5 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoAbu_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Emotional Abuse by GENDER",
       x = "GENDER",
       y = "CTQ emo abuse") + gghisto

#emotional neglect x sex
t.test(WK2_CTQSF_EmoNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
p6 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoNeg_RS)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "CTQ Emotional Neglect by GENDER",
       x = "GENDER",
       y = "CTQ emo neglect") + gghisto


#bullying x sex
p7 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_Bullying_Total)) +
  geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
  labs(title = "Bullying by Gender",
       x = " ",
       y = "Bullying Score") + gghisto

ctq.sex.plots <- grid.arrange(p1, p2, p3, p4, p5, p6, p7, ncol = 3)

#save these plots to a pdf
#ggsave("ctq.sex.plots.pdf", ctq.sex.plots, width = 12, height = 10)


#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
```

```{r}
#ttest of ctq total by sex
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)
```

<br>

#### Sup. Fig. 3: RACE x CTQ stats+plots

```{r}
#Look at the relationship between RACE and each CTQ category
# List of CTQ categories and their corresponding columns
ctq_tests <- list(
  CTQ_Total = "CTQ_Total",
  Physical_Abuse = "WK2_CTQSF_PhyAbu_RS",
  Physical_Neglect = "WK2_CTQSF_PhyNeg_RS",
  Emotional_Abuse = "WK2_CTQSF_EmoAbu_RS",
  Emotional_Neglect = "WK2_CTQSF_EmoNeg_RS",
  Sexual_Abuse = "WK2_CTQSF_SexAbu_RS",
  Bullying = "WK2_Bullying_Total"
)

# Initialize a vector to store significant results
significant_tests <- c()

# Perform Kruskal-Wallis test and Dunn's test for each CTQ category
for (name in names(ctq_tests)) {
  cat("Testing:", name, "\n")
  kw_test <- kruskal.test(df[[ctq_tests[[name]]]] ~ df$ED_RaceEthCode)
  print(kw_test)  
  if (kw_test$p.value < 0.05) {
    cat("Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...\n")
    dunn_result <- dunn.test(df[[ctq_tests[[name]]]], df$ED_RaceEthCode, method = "bh") 
    print(dunn_result)
    significant_tests <- c(significant_tests, name)
  } else {
    cat("Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.\n")
  }
  cat("\n") 
}

# Print names of significant CTQ categories
if (length(significant_tests) > 0) {
  cat("CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:\n")
  cat(paste(significant_tests, collapse = "\n"))
} else {
  cat("No CTQ categories with significant Kruskal-Wallis test.\n")
}
```

```{r, warning=FALSE, message=FALSE}

df %>%
  group_by(ED_RaceEthCode) %>%
  summarize(
    Mean_CTQ_Tot = mean(CTQ_Total, na.rm = TRUE),
    SD_CTQ_Tot = sd(CTQ_Total, na.rm = TRUE),
    Mean_Phys_Abuse = mean(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
    SD_Phys_Abuse = sd(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
    Mean_Phys_Neglect = mean(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
    SD_Phys_Neglect = sd(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
    Mean_Emo_Abuse = mean(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
    SD_Emo_Abuse = sd(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
    Mean_Emo_Neglect = mean(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
    SD_Emo_Neglect = sd(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
    Mean_Sex_Abuse = mean(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
    SD_Sex_Abuse = sd(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
    Mean_Bullying = mean(WK2_Bullying_Total, na.rm = TRUE),
    SD_Bullying = sd(WK2_Bullying_Total, na.rm = TRUE)
  ) %>%
  gt() %>%
  tab_header(
    title = "Mean and Standard Deviation of CTQ Scores by Race"
  ) %>%
  fmt_number(
    columns = vars(starts_with("Mean"), starts_with("SD")),
    decimals = 2
  )
```

```{r}
# CTQ total x Race
p7 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = CTQ_Total, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ TOTAL by RACE",
       x = "RACE", y = "CTQ TOTAL") + gghisto

# Sexual assault CTQ x Race
p8 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_SexAbu_RS, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ Sexual Abuse by RACE",
       x = "RACE", y = "CTQ Sexual Abuse") + gghisto

# physical abuse x Race
p9 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyAbu_RS, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ Physical Abuse by RACE",
       x = "RACE", y = "CTQ phy Abuse") + gghisto

# physical neglect x Race
p10 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyNeg_RS, fill = ED_RaceEthCode)) +
  geom_boxplot(lwd = .9) +
  theme(legend.position = "none") +
  labs(title = "Boxplot of CTQ Physical Neglect by RACE",
       x = "RACE", y = "CTQ phy neglect") + gghisto

#emotional abuse x Race
p11 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoAbu_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
  labs(title = "CTQ Emotional Abuse by RACE",
       x = "RACE", y = "CTQ emo abuse") + gghisto

#emotional neglect x Race
p12 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoNeg_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "CTQ Emotional Neglect by RACE",
       x = "RACE", y = "CTQ emo neglect") + 
  theme(legend.position = "none") + gghisto


#bullying x Race
p13 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_Bullying_Total, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "Bullying by Race", 
       x = " ", y = "Bullying Score") + 
  theme(legend.position = "none") + gghisto                                                                                                                                                         
                                                                                                                        ctq.race.plots_simple <- grid.arrange(p7, p8, p9, p10, p11, p12, p13, ncol = 3) 

#save as pdf
#ggsave("ctq.race.plots_simple.pdf", ctq.race.plots_simple, width = 12, height = 10)
```

<br>

#### ADI vs PDI

```{r, warning=FALSE, message=FALSE}
cor_results <- list(
  "ADI vs PDI" = cor.test(df$ADI_NatRank, df$ED_PDI_RS, use="complete.obs", method = "spearman"),
  "ADI vs CTQ" = cor.test(df$ADI_NatRank, df$CTQ_Total, use="complete.obs", method = "spearman"),
  "PDI vs CTQ" = cor.test(df$ED_PDI_RS, df$CTQ_Total, use="complete.obs", method = "spearman"))

#print the number of complete cases for each of ADI, PDI, and CTQ
sum(complete.cases(df$ADI_NatRank))
sum(complete.cases(df$ED_PDI_RS))
sum(complete.cases(df$CTQ_Total))

#print the number of complete cases overlapping between PDI and ADI
sum(complete.cases(df$ADI_NatRank, df$ED_PDI_RS))

data.frame(
  Comparison = names(cor_results),
  rho = sapply(cor_results, function(x) round(x$estimate, 6)),
  p_value = sapply(cor_results, function(x) round(x$p.value, 6)))
```

```{r, warning=FALSE, message=FALSE}
#plot of PDI x CTQ
p1 <- ggplot(df, aes(x = ED_PDI_RS, y = CTQ_Total)) +
  geom_point(alpha = 0.3, size=3) +  
  geom_smooth(method = "lm", se = FALSE, color = "magenta3", lwd=3) +  
  stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) + 
  labs(title = "Scatter Plot of PDI vs CTQ Total",
       x = "PDI", y = "CTQ Total") + gghisto

#Plot of ADI x CTQ
p2 <- ggplot(df, aes(x = ADI_NatRank, y = CTQ_Total)) +
  geom_point(alpha = 0.3, size=3) +  
  geom_smooth(method = "lm", se = FALSE, color = "purple3", lwd=3) +  
  stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) + 
  labs(title = "Scatter Plot of ADI vs CTQ Total",
       x = "ADI", y = "CTQ Total") + gghisto

grid.arrange(p1,p2)
```

<br>

#### Sup. Table 1: Compared Pain Classes

```{r}
table1_class_features <- c("Pain_Class","ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank","Site_New", "ED_Event_BroadClass", "ED_PDI_RS","PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")

# Create a named list to specify the type of each variable
type_list <- list(
  WK2_CTQSF_PhyAbu_RS = "continuous",
  WK2_CTQSF_EmoAbu_RS = "continuous",
  WK2_CTQSF_SexAbu_RS = "continuous",
  WK2_CTQSF_PhyNeg_RS = "continuous",
  WK2_CTQSF_EmoNeg_RS = "continuous",
  WK2_Bullying_Total = "continuous")

# Generate the summary table and compare across class and add p

suppressMessages(suppressWarnings(
  df %>%
    select(all_of(table1_class_features)) %>%
    tbl_summary(
      by = Pain_Class,
      statistic = all_continuous() ~ "{mean} ({sd})",
      digits = all_continuous() ~ 2,
      type = type_list) %>% add_p)) 
```


<br>

#### Category distribution plots

```{r}
p1 <- ggplot(df_traj %>% filter(!is.na(ED_Event_BroadClass)), aes(x = ED_Event_BroadClass, fill=ED_Event_BroadClass)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Trauma Type", y = "Count") + gghisto2

p2 <- ggplot(df_traj %>% filter(!is.na(ED_highestgrade)), aes(x = ED_highestgrade, fill=ED_highestgrade)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Education",
       y = "Count") + gghisto2

p3 <- ggplot(df_traj %>% filter(!is.na(ED_Marital)), aes(x = ED_Marital, fill=ED_Marital)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Marital Status",
       y = "Count")  + gghisto2

p4 <- ggplot(df_traj %>% filter(!is.na(Pain_Class)), aes(x = Pain_Class, fill=Pain_Class)) +
  geom_bar(color="black") + theme(legend.position = "none") +
    geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
  labs(title = "Counts of Pain Class",
       y = "Count") + gghisto2

grid.arrange(p1, p2, p3, p4, ncol = 2)
```
<br>

#### Chi Sq function

```{r}
#run chi square test for all categorical variables
chi_sq_results <- function(df, features, group_var) {
  test_feature <- function(feature) {
    if (!feature %in% names(df)) {
      return(data.frame(
        Feature = feature,
        Chi_Square = NA,
        P_Value = NA
      ))}
    chi_sq_test <- tryCatch(
      chisq.test(table(df[[feature]], df[[group_var]])),
      error = function(e) {
        return(list(statistic = NA, p.value = NA))
      })
    chi_squared <- chi_sq_test$statistic
    p_value <- chi_sq_test$p.value
    
    return(data.frame(
      Feature = feature,
      Chi_Square = chi_squared,
      P_Value = p_value
    ))}
  
  results <- map_dfr(features, test_feature)
  return(results)
}

categorical_features <- c("ED_GenderBirthCert", "ED_RaceEthCode", "Site_New", "ED_Event_BroadClass", "PRE_Pain_MdSv", "CTQ_Any", "PhyAbu_Any", "EmoAbu_Any", "SexAbu_Any", "PhyNeg_Any", "EmoNeg_Any", "Bullying_Any")

chi.sq_table <- chi_sq_results(df, categorical_features, "Pain_Class")
chi.sq_table

#save chi sq table as csv
#write.csv(chi.sq_table, "chi_sq_table.csv")
```

<br>

#### KW function

```{r}
kruskal_wallis_results <- function(df, features, group_var) {
  test_feature <- function(feature) {
    if (!feature %in% names(df)) {
      return(data.frame(
        Feature = feature,
        Chi_Square = NA,
        P_Value = NA
      ))}
    kw_test <- tryCatch(
      kruskal.test(df[[feature]] ~ df[[group_var]]),
      error = function(e) {
        return(list(statistic = NA, p.value = NA))
      })
    chi_squared <- kw_test$statistic
    p_value <- kw_test$p.value
    
    return(data.frame(
      Feature = feature,
      Chi_Square = chi_squared,
      P_Value = p_value
    ))}
  
  results <- map_dfr(features, test_feature)
  return(results)
}

continuous_features <- c("WK2_CTQSF_PhyAbu_RS", "WK2_CTQSF_EmoAbu_RS", "WK2_CTQSF_SexAbu_RS", 
                         "WK2_CTQSF_PhyNeg_RS", "WK2_CTQSF_EmoNeg_RS", "WK2_Bullying_Total", 
                         "ED_Age", "BMI", "ADI_NatRank", "ED_PDI_RS")

kw_table <- kruskal_wallis_results(df, continuous_features, "Pain_Class")

kw_table
#save kw table as csv
#write.csv(kw_table, "kw_table.csv")
```

<br>

### Models

#### Functions to perform BH Correcations

```{r}
#function to apply BH correction to list of models

apply_bh_correction <- function(models, model_names) {
  correct_model <- function(model) {
    model_results <- tidy(model, exponentiate = TRUE) %>%
      filter(term != "(Intercept)") %>%
      select(y.level,term, estimate, p.value)
    
    model_results <- model_results %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH")) %>%
      mutate(
        p.value = round(p.value, digits = 6),
        p_adj_bh = round(p_adj_bh, digits = 6)
      ) %>%
      mutate(
        p.value = format(p.value, scientific = FALSE),
        p_adj_bh = format(p_adj_bh, scientific = FALSE)
      )
    return(model_results)
  }
  
  corrected_models <- lapply(models, correct_model)
  names(corrected_models) <- model_names
  
  return(corrected_models)
}
```

```{r}
#function to get adjusted p values from list of models

extract_p_values <- function(model_df, model_name) {
  model_df %>%
    mutate(model = model_name) %>%
    select(y.level, term, p.value,p_adj_bh, model)}
```


#### Models 33-34 Pre-pain logistic regression

```{r}
# Create binary variables for high/low CTQ and Bullying
df <- df %>%
  mutate(
    CTQ_HighLow = ifelse(CTQ_Total > 6, "High", "Low"),
    Bullying_HighLow = ifelse(WK2_Bullying_Total > 3, "High", "Low"))

# Create binary variable for PRE_Pain_MdSv
df$PRE_Pain_MdSv2 <- ifelse(df$PRE_Pain_MdSv == "Yes", 1, 0)

# M33 Logistic regression using CTQ composite
m33 <- glm(PRE_Pain_MdSv2 ~ CTQ_Total, data = df, family = "binomial")

tidy(m33, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric)))

# M34 Logistic regression using Bullying composite
m34 <- glm(PRE_Pain_MdSv2 ~ WK2_Bullying_Total, data = df, family = "binomial")
tidy(m34, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric)))

# Chi-square test for CTQ_HighLow
chisq.test(table(df$CTQ_HighLow, df$PRE_Pain_MdSv))

# Chi-square test for Bullying_HighLow
chisq.test(table(df$Bullying_HighLow, df$PRE_Pain_MdSv))

# chi-square test for any CTQ
chisq.test(table(df$CTQ_Any, df$PRE_Pain_MdSv))

# chi-square test for any bullying
chisq.test(table(df$Bullying_Any, df$PRE_Pain_MdSv))
```

<br>

#### Model 1: Base Model + VIF

Main multi-nominal regression model with CTQ as a predictor variable, including our standard covariates (same in linear models), and latent pain trajectories as the dependent variables. Model 2 is this with extra features.

```{r}
#pain trajectory ~ age + sex + race + site + CTQ
m1 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total, data = df)

#view output
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))  %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH"))

#examine variance inflation factor to assess collinearity
vif(m1)
```

Variance Inflation Factor (VIF) is used to detect multicollinearity. VIF \> 10: Indicates high multicollinearity that might be problematic. VIF between 5 and 10: Moderate multicollinearity that might need addressing. VIF \< 5: Generally considered acceptable.

<br>

#### Model 2: Additional Features + VIF

```{r}
m2 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))  %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH"))

vif(m2)
```

<br>

#### Model 15: M2 + CTQ\*Sex interaction

```{r}
m15 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_GenderBirthCert, data = df)

tidy(m15, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

<br>

#### Model 14: M2 + CTQ\*race interaction

```{r}
m14 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_RaceEthCode, data = df)

tidy(m14, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

<br>

#### Models 16-21: M2 + each CTQ subtype + bullying

```{r}
#physical abuse m16
m16 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

#physical neglect m17
m17 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

#emotional abuse m18
m18 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

#emotional neglect m19
m19 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

#sexual abuse m20
m20 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

#bullying m21
m21 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m16, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m17, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m18, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m19, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m20, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 

tidy(m21, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5))) 
```

#### BH Across M16-M21

perform BH corrections on the values in combined_p_values_m16_m21
```{r}
#BH corrections for models with subtypes m16-m21

#make first set of adjusted within p vals
models_list_subtypes <- list(m16, m17, m18, m19, m20, m21)
model_names2 <- c("m16", "m17", "m18", "m19", "m20", "m21")
adjusted_models2 <- apply_bh_correction(models_list_subtypes, model_names2)

combined_p_values_m16_m21 <- map2_dfr(adjusted_models2, paste0("m", 16:21), extract_p_values)

double_corrected_p_values_16_m21 <- combined_p_values_m16_m21 %>%
  mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
  mutate(
    p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
    p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE)  )

double_corrected_p_values_16_m21
```

<br>

#### Models 26-31: M2 + BINARY INDICATORS

M26: M2 + PhyAbu ANY

```{r}
#Model 2 but instead of physical abuse score, its a binary score of whether or not the pt had any physical abuse

m26 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m26, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))%>%
  mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
```


Model 27: M2 + Emo_Abuse ANY

```{r}
#Model 2 but instead of emotional abuse score, its a binary score of whether or not the pt had any emotional abuse

m27 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m27, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

Model 28: M2 + Emo_Neglect ANY

```{r}
#Model 2 but instead of emotional neglect score, its a binary score of whether or not the pt had any emotional neglect

m28 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m28, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

Model 29: M2 + Phy_Neglect ANY

```{r}
#Model 2 but instead of physical neglect score, its a binary score of whether or not the pt had any physical neglect

m29 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m29, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

Model 30: M2 + Sexual Abuse ANY

```{r}
#Model 2 but instead of sexual abuse score, its a binary score of whether or not the pt had any sexual abuse

m30 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + SexAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m30, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

Model 31: M2 + Bullying ANY

```{r}
#Model 2 but instead of bullying score, its a binary score of whether or not the pt had any bullying

m31 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + Bullying_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)

tidy(m31, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))
```

<br>

#### BH Across M26-M31

```{r}
#BH BETWEEN Corrections for models with binary indicators m26-m31

models_list_ANY <- list(m26, m27, m28, m29, m30, m31)
model_names3 <- c("m26", "m27", "m28", "m29", "m30", "m31")
adjusted_models3 <- apply_bh_correction(models_list_ANY, model_names3)

double_corrected_p_values_m26_m31 <- map2_dfr(adjusted_models3, paste0("m", 26:31), extract_p_values)

#perform BH corrections on the values in double_corrected_p_values_m26_m31
double_corrected_p_values_m26_m31 <- double_corrected_p_values_m26_m31 %>%
  mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
  mutate(
    p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
    p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE)  )

double_corrected_p_values_m26_m31
```

<br>

#### Model 13: M2 + CTQ_Triad

```{r}
m13 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad + BMI + PRE_Pain_MdSv + ED_Concussion + ED_Event_BroadClass + ED_PDI_RS + ADI_NatRank, data = df)

tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(., 5)))  %>%
      mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
```


<br>


