This document contains the code for the AURORA ELA analysis. In this analysis, we will explore the relationship between childhood trauma (CTQ) and pain outcomes in the AURORA dataset. We will perform correlation analysis, linear models, repeated measures models, and assess the relationship between CTQ and pain outcome trajectories. This is a preliminary draft of the analysis that will be completed over the next 2 weeks.
rm(list=ls())
libraries <- c("tidyverse", "rstatix", "MASS","corrplot", "nlme", "sjPlot", "readxl", "nnet", "broom", "lmtest", "forcats","car", "highcharter", "ggplot2", "corrr", "ggcorrplot", "gridExtra", "skimr", "pROC", "gtsummary", "dunn.test", "tidyr", "purrr", "dplyr")
suppressMessages(suppressWarnings(
invisible(lapply(libraries, library, character.only = TRUE))))
Note: For the purpose of this draft code file, Lauren has already
combined all of the freeze 4 datasets into one file. This pre-combined
file, called ELA_full_data_raw.csv below, will be used for
the initial analysis. The full file import code will be rendered and
applied in the final script
#Read in the dataset
data_1 <- read.csv("AURORA_full_update02102022_New.csv",check.names=F)[-1]
#freeze 4 datasets
AURORA_Freeze_4_general_mod <- read_csv("AURORA_Freeze_4_general_mod.csv",na = ".")
AURORA_Freeze_4_demogr_mod <- read_csv("AURORA_Freeze_4_demogr_mod.csv",na = ".")
AURORA_Freeze_4_pain_mod <- read_csv("AURORA_Freeze_4_pain_mod.csv",na = ".")
AURORA_Freeze_4_ctq_mod <- read_csv("AURORA_Freeze_4_ctq_mod.csv",na = ".")
AURORA_F4_CTQ_item <- readxl::read_excel("AURORA_F4_CTQ_item.xlsx")
#AURORA_Freeze_4_pdi_mod <- read_csv("AURORA_Freeze_4_pdi_mod.csv",na = ".")
SiteID <- read_excel("SiteID.xlsx")
FZ4_df <- AURORA_Freeze_4_demogr_mod %>% inner_join(AURORA_F4_CTQ_item,by="PID") %>%
inner_join(AURORA_Freeze_4_pain_mod,by="PID") %>%
inner_join(AURORA_Freeze_4_ctq_mod,by="PID") %>%
#inner_join(AURORA_Freeze_4_pdi_mod,by="PID") %>%
inner_join(SiteID,by="PID") %>%
mutate(Site_New = case_when(SiteID %in% c("Baystate","BMC", "Beth Israel", "BWH", "Cooper","Einstein","Jefferson","MGH","Miriam","Penn","Rhode Island","St. John","St. Joseph","Temple","UMass") ~ "NorthEast",SiteID %in% c("Baylor", "Emory ED", "Jacksonville","UAB","UNC","UT Houston","UT Southwestern","Vanderbilt") ~ "SouthEast", SiteID %in% c("39","49","Beaumont Royal Oak","Beaumont Troy","Eskenazi","Henry Ford","Cincinnati","Indiana","WashU","WashU DP") ~ "Midwest")) %>%
convert_as_factor(Site_New,ED_GenderBirthCert,ED_GenderNow,ED_Marital,ED_RaceEthCode,ED_highestgrade,WK2_EmploymentCode,WK2_IncomeCode) %>%
mutate_at(vars(contains("WK2_Childhood")),as.numeric) %>% dplyr::select(-SiteID) %>% #2682
mutate(WK2_Bullying_Total = WK2_ChildhoodBullying+WK2_ChildhoodHitOrHurt) %>%
inner_join(data_1 %>% dplyr::select(PID,WK8_Pain_C,M3_Pain_C,M6_Pain_C),by="PID") %>%
filter(is.na(WK2_CTQSF_SexAbu_RS) == F) %>%
filter(is.na(WK2_CTQSF_PhyAbu_RS)== F) %>%
filter(is.na(WK2_CTQSF_EmoAbu_RS)== F) %>%
filter(is.na(WK2_CTQSF_EmoNeg_RS) == F) %>%
filter(is.na(WK2_CTQSF_PhyNeg_RS) == F) %>%
filter(is.na(WK2_Bullying_Total) == F) %>% #2483
mutate(log_WK2_CTQSF_Total_RS = log2(WK2_CTQSF_Total_RS+1),log_ED_NowPain=log2(ED_NowPain+1))
Below: Read in the completed dataframe with each of the separate AURORA files. Lauren did this on her end for ease of use in this draft code analysis. Latent class trajectories also imported.
Troubleshooting why the R environment is all fucked up and nothing works
#rm(list = ls())
#install.packages("dplyr")
library(readxl)
library(dplyr)
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
# Explicitly set select to refer to dplyr::select
select <- dplyr::select
rename <- dplyr::rename
# Import datasets
FZ4_df <- read_csv("ELA_full_data_raw.csv")
New names:
• `` -> `...1`
Rows: 2483 Columns: 78
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Site_New
dbl (77): ...1, PID, ED_GenderBirthCert, ED_GenderNow, ED_Age, ED_Marital, ED_RaceEthCode, ED_highestgrade, BMI, WK2_Employment...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pain_trajectory <- read_csv("pain_latent_class_4.csv")
Rows: 2943 Columns: 6
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): pain_Class_MostLikely
dbl (5): PID, Probability_Class1, Probability_Class2, Probability_Class3, Probability_Class4
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PTSD_df <- read_excel("AURORA_Freeze_4_ptsd_mod.xlsx")
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
Injury_df <- read_excel("AURORA_Freeze_4_injury_mod.xlsx")
ADI_df <- read_excel("AURORA_Freeze_4_ses_mod.xlsx",na = ".")
# Combine datasets
df_traj <- FZ4_df %>%
inner_join(Trauma_df %>% select(PID, ED_Event_BroadClass), by = "PID") %>%
inner_join(Injury_df %>% select(PID, ED_Concussion), by = "PID") %>%
inner_join(ADI_df %>% select(PID, ADI_NatRank), by = "PID") %>%
inner_join(pain_trajectory %>% select(PID, pain_Class_MostLikely), by = "PID")
#export <- df_traj[,c(2,82,70)]
#write.csv(export, "ELA_full_data.csv")
#import xiady df AURORA_Construct_Score_FS_Frz4_Data.csv
df_xiaodi <- read_csv("AURORA_Construct_Score_FS_Frz4_Data.csv")
Rows: 2887 Columns: 141
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (140): Pain_1, Pain_9, Pain_21, Pain_31, Pain_43, Pain_53, Pain_67, Pain_77, Pain_105, Pain_147, Pain_196, Pain_238, Pain_2...
dbl (1): PID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#get rid of everything but the variables that start with Pain
df_xiaodi2 <- df_xiaodi %>% select(PID, starts_with("Pain")) %>% as.data.frame()
# bind it by PID to our data set and send me a simpler CSV that is just PID, Pain Trajectory, and Pain Scores for all the time points?
pain_scores_df <- df_traj[,c(2,82)]
#join pain_scores_df and df_xiaodi2 by PID
xiaodi_df_for_Lauren <- pain_scores_df %>% inner_join(df_xiaodi2, by = "PID")
#write csv of xiaodi_df_for_Lauren
write.csv(xiaodi_df_for_Lauren, "xiaodi_df_for_Lauren.csv")
Refactor pain trajectories and Relevel to “low pain” as the reference group
#df_traj$pain_Class_MostLikely
#df_traj <- df_traj %>% rename(Pain_Class = pain_Class_MostLikely)
names(df_traj)[names(df_traj) == "pain_Class_MostLikely"] <- "Pain_Class"
df_traj <- df_traj %>%
mutate(Pain_Class = case_when(
Pain_Class == "Class 1" ~ "moderate recovery",
Pain_Class == "Class 2" ~ "moderate",
Pain_Class == "Class 3" ~ "low",
Pain_Class == "Class 4" ~ "high",
TRUE ~ as.character(Pain_Class) # This handles any unexpected values
)) %>%
mutate(Pain_Class = factor(Pain_Class)) %>%
mutate(Pain_Class = relevel(Pain_Class, ref = "low"))
Factorize and rename categorical variables
factor_vars <- c("ED_GenderBirthCert", "ED_Event_BroadClass", "ED_Marital",
"WK2_EmploymentCode", "ED_highestgrade", "ED_RaceEthCode", "PRE_Pain_MdSv")
df_traj[factor_vars] <- lapply(df_traj[factor_vars], as.factor)
#in gender, change 1 to male and 2 to female
df_traj$ED_GenderBirthCert <- ifelse(df_traj$ED_GenderBirthCert == 1, "Male", "Female")
#in pre pain, change 1 to yes and 0 to no .. is this correct?
df_traj$PRE_Pain_MdSv <- ifelse(df_traj$PRE_Pain_MdSv == 1, "Yes", "No")
# Convert continuous variables to numeric
df_traj$ED_PDI_RS <- as.numeric(df_traj$ED_PDI_RS)
df_traj$ED_Concussion <- as.numeric(df_traj$ED_Concussion)
Warning: NAs introduced by coercion
#rename WK2 CTQ total column to CTQ_Total
df_traj <- df_traj %>%
rename(CTQ_Total = WK2_CTQSF_Total_RS)
general clean up, renaming, removing negatives
#Make any value less than 0 in the bullying total == NA
df_traj$WK2_Bullying_Total <- ifelse(df_traj$WK2_Bullying_Total < 0, NA, df_traj$WK2_Bullying_Total)
#Make any value less than 0 in ADI == NA
df_traj$ADI_NatRank <- ifelse(df_traj$ADI_NatRank < 0, NA, df_traj$ADI_NatRank)
#3 bullying rows have negative numbers
#remove the rows in WK2_Bullying_Total that contain negative values
df_traj <- df_traj %>% filter(WK2_Bullying_Total >= 0)
relevel categories for trauma type
df_traj <- df_traj %>%
mutate(ED_Event_BroadClass = case_when(
ED_Event_BroadClass == "1" ~ "MVC/Non-motor Collision",
ED_Event_BroadClass == "6" ~ "MVC/Non-motor Collision",
ED_Event_BroadClass == "2" ~ "Physical/Sexual Abuse",
ED_Event_BroadClass == "3" ~ "Physical/Sexual Abuse",
ED_Event_BroadClass == "4" ~ "Other",
ED_Event_BroadClass == "5" ~ "Other",
ED_Event_BroadClass == "7" ~ "Other",
ED_Event_BroadClass == "8" ~ "Other",
ED_Event_BroadClass == "9" ~ "Other",
ED_Event_BroadClass == "10" ~ "Other",
ED_Event_BroadClass == "11" ~ "Other",
TRUE ~ as.character(ED_Event_BroadClass) # Keeps any other values unchanged
))
# Convert ED_Event broad class to factor
df_traj$ED_Event_BroadClass <- factor(df_traj$ED_Event_BroadClass)
table(df_traj$ED_Event_BroadClass)
MVC/Non-motor Collision Other Physical/Sexual Abuse
1909 340 231
Relevel categories for education
#relevel education category
# 1- did not finish HS (codes 0-12) 2- HS grad + some college (codes 13-15) 3- college grad (Bachelors or Associates) (codes 16-18) 4- post grad codes(19-21)
df_traj <- df_traj %>%
mutate(ED_highestgrade = case_when(
ED_highestgrade == "0" ~ "Did not finish HS",
ED_highestgrade == "1" ~ "Did not finish HS",
ED_highestgrade == "7" ~ "Did not finish HS",
ED_highestgrade == "8" ~ "Did not finish HS",
ED_highestgrade == "9" ~ "Did not finish HS",
ED_highestgrade == "10" ~ "Did not finish HS",
ED_highestgrade == "11" ~ "Did not finish HS",
ED_highestgrade == "12" ~ "Did not finish HS",
ED_highestgrade == "13" ~ "HS Grad/Some College",
ED_highestgrade == "14" ~ "HS Grad/Some College",
ED_highestgrade == "15" ~ "HS Grad/Some College",
ED_highestgrade == "16" ~ "Bach or Assoc degree",
ED_highestgrade == "17" ~ "Bach or Assoc degree",
ED_highestgrade == "18" ~ "Bach or Assoc degree",
ED_highestgrade == "19" ~ "Post-graduate education",
ED_highestgrade == "20" ~ "Post-graduate education",
ED_highestgrade == "21" ~ "Post-graduate education",
ED_highestgrade == "-7" ~ NA,
ED_highestgrade == "-5" ~ NA,
TRUE ~ as.character(ED_highestgrade) # Keeps any other values unchanged
))
# Convert to factor
df_traj$ED_highestgrade <- factor(df_traj$ED_highestgrade)
table(df_traj$ED_highestgrade)
Bach or Assoc degree Did not finish HS HS Grad/Some College Post-graduate education
695 267 1321 189
Re-level marriage category
#relevel marrital status category
#marital status 1-married 2-separated 3-divorced 4-annulled 5-widowed 6-single
#4. marriage - combine 2+3+4+5 as a single category
df_traj <- df_traj %>%
mutate(ED_Marital = case_when(
ED_Marital == "1" ~ "Married",
ED_Marital == "2" ~ "Divorced/Widowed",
ED_Marital == "3" ~ "Divorced/Widowed",
ED_Marital == "4" ~ "Divorced/Widowed",
ED_Marital == "5" ~ "Divorced/Widowed",
ED_Marital == "6" ~ "Single",
TRUE ~ as.character(ED_Marital) # Keeps any other values unchanged
))
# Convert to factor
df_traj$ED_Marital <- factor(df_traj$ED_Marital)
table(df_traj$ED_Marital)
Divorced/Widowed Married Single
446 539 1483
#relevel ED_RaceEthCode
#1 = Hispanuic, 2 = NH White, 3 = NH Black, 4 = Other
df_traj <- df_traj %>%
mutate(ED_RaceEthCode = case_when(
ED_RaceEthCode == "1" ~ "Hispanic",
ED_RaceEthCode == "2" ~ "White",
ED_RaceEthCode == "3" ~ "Black",
ED_RaceEthCode == "4" ~ "Other",
TRUE ~ as.character(ED_RaceEthCode) # Keeps any other values unchanged
))
#convert to factor
df_traj$ED_RaceEthCode <- factor(df_traj$ED_RaceEthCode)
Subset df_traj to our final df that will be used for model analysis, keep only the features needed
df <- df_traj %>%
select(Pain_Class,ED_Age, Site_New, ED_RaceEthCode, ED_GenderBirthCert, ADI_NatRank, CTQ_Total,
BMI, PRE_Pain_MdSv, WK2_EmploymentCode, ED_Marital, ED_highestgrade,
ED_Concussion, ED_Event_BroadClass, ED_PDI_RS, WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_EmoAbu_RS, WK2_CTQSF_SexAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoNeg_RS, WK2_Bullying_Total)
Add the CTQ triad score that is a combination of bullying, physical abuse, and emotional abuse scores
#add combination CTQ score
df$CTQ_Triad <- df$WK2_Bullying_Total + df$WK2_CTQSF_PhyAbu_RS + df$WK2_CTQSF_EmoAbu_RS
#print the rows that have NA in the CTQ triad
df %>% filter(is.na(CTQ_Triad))
#Confirms that for the CTQ triad, if any of the additive features contain NA, then the final score is NA
#make all of df[,c(2,6:8)]) numeric
df[,c(2,6:8)] <- lapply(df[,c(2,6:8)], as.numeric)
Binary scores for later models
df$Bullying_Any <- ifelse(df$WK2_Bullying_Total > 0, 1, 0)
df$PhyAbu_Any <- ifelse(df$WK2_CTQSF_PhyAbu_RS > 0, 1, 0)
df$EmoAbu_Any <- ifelse(df$WK2_CTQSF_EmoAbu_RS > 0, 1, 0)
df$SexAbu_Any <- ifelse(df$WK2_CTQSF_SexAbu_RS > 0, 1, 0)
df$PhyNeg_Any <- ifelse(df$WK2_CTQSF_PhyNeg_RS > 0, 1, 0)
df$EmoNeg_Any <- ifelse(df$WK2_CTQSF_EmoNeg_RS > 0, 1, 0)
df$CTQ_Any <- ifelse(df$CTQ_Total > 0, 1, 0)
#scale
df_z <- df %>% mutate(across(where(is.numeric), ~ scale(.)))
#summary(df_z)
suppressMessages(suppressWarnings(
df %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2) ))
| Characteristic | N = 2,4801 |
|---|---|
| Pain_Class | |
| low | 1,070 (43%) |
| high | 352 (14%) |
| moderate | 590 (24%) |
| moderate recovery | 468 (19%) |
| ED_Age | 36.09 (13.31) |
| Site_New | |
| Midwest | 960 (39%) |
| NorthEast | 1,053 (42%) |
| SouthEast | 467 (19%) |
| ED_RaceEthCode | |
| Black | 1,213 (49%) |
| Hispanic | 278 (11%) |
| Other | 91 (3.7%) |
| White | 891 (36%) |
| Unknown | 7 |
| ED_GenderBirthCert | |
| Female | 1,554 (63%) |
| Male | 926 (37%) |
| ADI_NatRank | 63.93 (27.57) |
| Unknown | 80 |
| CTQ_Total | 9.50 (9.79) |
| BMI | 30.13 (8.32) |
| Unknown | 158 |
| PRE_Pain_MdSv | 777 (31%) |
| Unknown | 10 |
| WK2_EmploymentCode | |
| 1 | 1,816 (73%) |
| 2 | 65 (2.6%) |
| 3 | 55 (2.2%) |
| 4 | 96 (3.9%) |
| 5 | 446 (18%) |
| Unknown | 2 |
| ED_Marital | |
| Divorced/Widowed | 446 (18%) |
| Married | 539 (22%) |
| Single | 1,483 (60%) |
| Unknown | 12 |
| ED_highestgrade | |
| Bach or Assoc degree | 695 (28%) |
| Did not finish HS | 267 (11%) |
| HS Grad/Some College | 1,321 (53%) |
| Post-graduate education | 189 (7.6%) |
| Unknown | 8 |
| ED_Concussion | 112 (4.5%) |
| Unknown | 1 |
| ED_Event_BroadClass | |
| MVC/Non-motor Collision | 1,909 (77%) |
| Other | 340 (14%) |
| Physical/Sexual Abuse | 231 (9.3%) |
| ED_PDI_RS | 13.92 (7.24) |
| Unknown | 139 |
| WK2_CTQSF_PhyAbu_RS | |
| 0 | 1,391 (56%) |
| 1 | 219 (8.8%) |
| 2 | 250 (10%) |
| 3 | 128 (5.2%) |
| 4 | 194 (7.8%) |
| 5 | 66 (2.7%) |
| 6 | 83 (3.3%) |
| 7 | 29 (1.2%) |
| 8 | 120 (4.8%) |
| WK2_CTQSF_EmoAbu_RS | |
| 0 | 853 (34%) |
| 1 | 288 (12%) |
| 2 | 323 (13%) |
| 3 | 177 (7.1%) |
| 4 | 249 (10%) |
| 5 | 143 (5.8%) |
| 6 | 152 (6.1%) |
| 7 | 110 (4.4%) |
| 8 | 185 (7.5%) |
| WK2_CTQSF_SexAbu_RS | 1.93 (3.35) |
| WK2_CTQSF_PhyNeg_RS | |
| 0 | 1,334 (54%) |
| 1 | 231 (9.3%) |
| 2 | 277 (11%) |
| 3 | 181 (7.3%) |
| 4 | 170 (6.9%) |
| 5 | 105 (4.2%) |
| 6 | 79 (3.2%) |
| 7 | 36 (1.5%) |
| 8 | 67 (2.7%) |
| WK2_CTQSF_EmoNeg_RS | |
| 0 | 1,196 (48%) |
| 1 | 167 (6.7%) |
| 2 | 301 (12%) |
| 3 | 149 (6.0%) |
| 4 | 269 (11%) |
| 5 | 128 (5.2%) |
| 6 | 141 (5.7%) |
| 7 | 41 (1.7%) |
| 8 | 88 (3.5%) |
| WK2_Bullying_Total | |
| 0 | 503 (20%) |
| 1 | 273 (11%) |
| 2 | 421 (17%) |
| 3 | 305 (12%) |
| 4 | 377 (15%) |
| 5 | 185 (7.5%) |
| 6 | 165 (6.7%) |
| 7 | 106 (4.3%) |
| 8 | 145 (5.8%) |
| CTQ_Triad | 7.08 (6.29) |
| Bullying_Any | 1,977 (80%) |
| PhyAbu_Any | 1,089 (44%) |
| EmoAbu_Any | 1,627 (66%) |
| SexAbu_Any | 867 (35%) |
| PhyNeg_Any | 1,146 (46%) |
| EmoNeg_Any | 1,284 (52%) |
| CTQ_Any | 1,977 (80%) |
| 1 n (%); Mean (SD) | |
# Ensure columns 16 to 22 are numeric
df[16:22] <- lapply(df[16:22], as.numeric)
# Create the summary table
suppressMessages(suppressWarnings(
df %>%
tbl_summary(
by = Pain_Class,
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2
) %>% add_p() ))
| Characteristic | low, N = 1,0701 | high, N = 3521 | moderate, N = 5901 | moderate recovery, N = 4681 | p-value2 |
|---|---|---|---|---|---|
| ED_Age | 33.36 (12.52) | 40.89 (13.19) | 37.61 (13.12) | 36.81 (14.00) | <0.001 |
| Site_New | 0.3 | ||||
| Midwest | 432 (40%) | 139 (39%) | 216 (37%) | 173 (37%) | |
| NorthEast | 441 (41%) | 136 (39%) | 267 (45%) | 209 (45%) | |
| SouthEast | 197 (18%) | 77 (22%) | 107 (18%) | 86 (18%) | |
| ED_RaceEthCode | <0.001 | ||||
| Black | 521 (49%) | 210 (60%) | 285 (49%) | 197 (42%) | |
| Hispanic | 122 (11%) | 37 (11%) | 77 (13%) | 42 (9.0%) | |
| Other | 39 (3.6%) | 12 (3.4%) | 23 (3.9%) | 17 (3.6%) | |
| White | 387 (36%) | 93 (26%) | 201 (34%) | 210 (45%) | |
| Unknown | 1 | 0 | 4 | 2 | |
| ED_GenderBirthCert | <0.001 | ||||
| Female | 605 (57%) | 237 (67%) | 409 (69%) | 303 (65%) | |
| Male | 465 (43%) | 115 (33%) | 181 (31%) | 165 (35%) | |
| ADI_NatRank | 62.03 (28.04) | 70.74 (26.08) | 66.28 (26.76) | 60.04 (27.52) | <0.001 |
| Unknown | 41 | 8 | 14 | 17 | |
| CTQ_Total | 8.02 (9.02) | 12.09 (10.63) | 10.75 (10.33) | 9.37 (9.54) | <0.001 |
| BMI | 29.17 (7.95) | 32.15 (9.44) | 30.95 (8.28) | 29.75 (7.93) | <0.001 |
| Unknown | 78 | 21 | 38 | 21 | |
| PRE_Pain_MdSv | 189 (18%) | 214 (61%) | 251 (43%) | 123 (27%) | <0.001 |
| Unknown | 2 | 1 | 3 | 4 | |
| WK2_EmploymentCode | <0.001 | ||||
| 1 | 861 (81%) | 203 (58%) | 409 (69%) | 343 (73%) | |
| 2 | 14 (1.3%) | 19 (5.4%) | 17 (2.9%) | 15 (3.2%) | |
| 3 | 15 (1.4%) | 10 (2.8%) | 17 (2.9%) | 13 (2.8%) | |
| 4 | 44 (4.1%) | 9 (2.6%) | 13 (2.2%) | 30 (6.4%) | |
| 5 | 135 (13%) | 111 (32%) | 134 (23%) | 66 (14%) | |
| Unknown | 1 | 0 | 0 | 1 | |
| ED_Marital | <0.001 | ||||
| Divorced/Widowed | 169 (16%) | 83 (24%) | 122 (21%) | 72 (16%) | |
| Married | 215 (20%) | 72 (20%) | 134 (23%) | 118 (25%) | |
| Single | 683 (64%) | 197 (56%) | 329 (56%) | 274 (59%) | |
| Unknown | 3 | 0 | 5 | 4 | |
| ED_highestgrade | <0.001 | ||||
| Bach or Assoc degree | 283 (26%) | 95 (27%) | 173 (29%) | 144 (31%) | |
| Did not finish HS | 89 (8.3%) | 56 (16%) | 78 (13%) | 44 (9.4%) | |
| HS Grad/Some College | 609 (57%) | 183 (52%) | 307 (52%) | 222 (48%) | |
| Post-graduate education | 89 (8.3%) | 15 (4.3%) | 29 (4.9%) | 56 (12%) | |
| Unknown | 0 | 3 | 3 | 2 | |
| ED_Concussion | 46 (4.3%) | 12 (3.4%) | 29 (4.9%) | 25 (5.3%) | 0.6 |
| Unknown | 0 | 0 | 1 | 0 | |
| ED_Event_BroadClass | 0.12 | ||||
| MVC/Non-motor Collision | 814 (76%) | 268 (76%) | 469 (79%) | 358 (76%) | |
| Other | 152 (14%) | 40 (11%) | 76 (13%) | 72 (15%) | |
| Physical/Sexual Abuse | 104 (9.7%) | 44 (13%) | 45 (7.6%) | 38 (8.1%) | |
| ED_PDI_RS | 12.49 (7.09) | 15.81 (7.33) | 15.00 (7.07) | 14.49 (7.13) | <0.001 |
| Unknown | 43 | 27 | 42 | 27 | |
| WK2_CTQSF_PhyAbu_RS | |||||
| 0 | 673 (63%) | 159 (45%) | 292 (49%) | 267 (57%) | |
| 1 | 92 (8.6%) | 20 (5.7%) | 54 (9.2%) | 53 (11%) | |
| 2 | 94 (8.8%) | 44 (13%) | 63 (11%) | 49 (10%) | |
| 3 | 50 (4.7%) | 22 (6.3%) | 36 (6.1%) | 20 (4.3%) | |
| 4 | 66 (6.2%) | 37 (11%) | 57 (9.7%) | 34 (7.3%) | |
| 5 | 23 (2.1%) | 13 (3.7%) | 21 (3.6%) | 9 (1.9%) | |
| 6 | 27 (2.5%) | 14 (4.0%) | 28 (4.7%) | 14 (3.0%) | |
| 7 | 11 (1.0%) | 4 (1.1%) | 8 (1.4%) | 6 (1.3%) | |
| 8 | 34 (3.2%) | 39 (11%) | 31 (5.3%) | 16 (3.4%) | |
| WK2_CTQSF_EmoAbu_RS | <0.001 | ||||
| 0 | 447 (42%) | 92 (26%) | 174 (29%) | 140 (30%) | |
| 1 | 136 (13%) | 30 (8.5%) | 53 (9.0%) | 69 (15%) | |
| 2 | 132 (12%) | 42 (12%) | 81 (14%) | 68 (15%) | |
| 3 | 71 (6.6%) | 29 (8.2%) | 48 (8.1%) | 29 (6.2%) | |
| 4 | 100 (9.3%) | 30 (8.5%) | 62 (11%) | 57 (12%) | |
| 5 | 46 (4.3%) | 25 (7.1%) | 42 (7.1%) | 30 (6.4%) | |
| 6 | 49 (4.6%) | 30 (8.5%) | 43 (7.3%) | 30 (6.4%) | |
| 7 | 36 (3.4%) | 21 (6.0%) | 37 (6.3%) | 16 (3.4%) | |
| 8 | 53 (5.0%) | 53 (15%) | 50 (8.5%) | 29 (6.2%) | |
| WK2_CTQSF_SexAbu_RS | 1.43 (2.94) | 2.63 (3.87) | 2.37 (3.65) | 2.00 (3.24) | <0.001 |
| WK2_CTQSF_PhyNeg_RS | 0.11 | ||||
| 0 | 610 (57%) | 175 (50%) | 293 (50%) | 256 (55%) | |
| 1 | 95 (8.9%) | 33 (9.4%) | 58 (9.8%) | 45 (9.6%) | |
| 2 | 110 (10%) | 38 (11%) | 78 (13%) | 51 (11%) | |
| 3 | 67 (6.3%) | 32 (9.1%) | 48 (8.1%) | 34 (7.3%) | |
| 4 | 63 (5.9%) | 31 (8.8%) | 41 (6.9%) | 35 (7.5%) | |
| 5 | 35 (3.3%) | 15 (4.3%) | 33 (5.6%) | 22 (4.7%) | |
| 6 | 35 (3.3%) | 14 (4.0%) | 19 (3.2%) | 11 (2.4%) | |
| 7 | 15 (1.4%) | 6 (1.7%) | 11 (1.9%) | 4 (0.9%) | |
| 8 | 40 (3.7%) | 8 (2.3%) | 9 (1.5%) | 10 (2.1%) | |
| WK2_CTQSF_EmoNeg_RS | 0.13 | ||||
| 0 | 545 (51%) | 163 (46%) | 263 (45%) | 225 (48%) | |
| 1 | 72 (6.7%) | 24 (6.8%) | 47 (8.0%) | 24 (5.1%) | |
| 2 | 133 (12%) | 37 (11%) | 76 (13%) | 55 (12%) | |
| 3 | 68 (6.4%) | 20 (5.7%) | 32 (5.4%) | 29 (6.2%) | |
| 4 | 91 (8.5%) | 47 (13%) | 78 (13%) | 53 (11%) | |
| 5 | 45 (4.2%) | 18 (5.1%) | 37 (6.3%) | 28 (6.0%) | |
| 6 | 55 (5.1%) | 23 (6.5%) | 35 (5.9%) | 28 (6.0%) | |
| 7 | 15 (1.4%) | 6 (1.7%) | 8 (1.4%) | 12 (2.6%) | |
| 8 | 46 (4.3%) | 14 (4.0%) | 14 (2.4%) | 14 (3.0%) | |
| WK2_Bullying_Total | <0.001 | ||||
| 0 | 256 (24%) | 62 (18%) | 114 (19%) | 71 (15%) | |
| 1 | 135 (13%) | 29 (8.2%) | 54 (9.2%) | 55 (12%) | |
| 2 | 194 (18%) | 42 (12%) | 91 (15%) | 94 (20%) | |
| 3 | 137 (13%) | 37 (11%) | 71 (12%) | 60 (13%) | |
| 4 | 151 (14%) | 55 (16%) | 93 (16%) | 78 (17%) | |
| 5 | 56 (5.2%) | 36 (10%) | 57 (9.7%) | 36 (7.7%) | |
| 6 | 63 (5.9%) | 26 (7.4%) | 44 (7.5%) | 32 (6.8%) | |
| 7 | 39 (3.6%) | 24 (6.8%) | 27 (4.6%) | 16 (3.4%) | |
| 8 | 39 (3.6%) | 41 (12%) | 39 (6.6%) | 26 (5.6%) | |
| CTQ_Triad | 5.90 (5.84) | 9.33 (7.09) | 7.96 (6.47) | 6.98 (5.78) | <0.001 |
| Bullying_Any | 814 (76%) | 290 (82%) | 476 (81%) | 397 (85%) | <0.001 |
| PhyAbu_Any | 397 (37%) | 193 (55%) | 298 (51%) | 201 (43%) | <0.001 |
| EmoAbu_Any | 623 (58%) | 260 (74%) | 416 (71%) | 328 (70%) | <0.001 |
| SexAbu_Any | 297 (28%) | 148 (42%) | 238 (40%) | 184 (39%) | <0.001 |
| PhyNeg_Any | 460 (43%) | 177 (50%) | 297 (50%) | 212 (45%) | 0.012 |
| EmoNeg_Any | 525 (49%) | 189 (54%) | 327 (55%) | 243 (52%) | 0.078 |
| CTQ_Any | 805 (75%) | 301 (86%) | 490 (83%) | 381 (81%) | <0.001 |
| 1 Mean (SD); n (%) | |||||
| 2 Kruskal-Wallis rank sum test; Pearson’s Chi-squared test | |||||
p1 <- ggplot(df_traj %>% filter(!is.na(ED_Event_BroadClass)), aes(x = ED_Event_BroadClass, fill=ED_Event_BroadClass)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Trauma Type", y = "Count") + gghisto2
p2 <- ggplot(df_traj %>% filter(!is.na(ED_highestgrade)), aes(x = ED_highestgrade, fill=ED_highestgrade)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Education",
y = "Count") + gghisto2
p3 <- ggplot(df_traj %>% filter(!is.na(ED_Marital)), aes(x = ED_Marital, fill=ED_Marital)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Marital Status",
y = "Count") + gghisto2
p4 <- ggplot(df_traj %>% filter(!is.na(Pain_Class)), aes(x = Pain_Class, fill=Pain_Class)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Pain Class",
y = "Count") + gghisto2
grid.arrange(p1, p2, p3, p4, ncol = 2)
#Look at the relationship between ED_GenderBirthCert and CTQ_Total
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.535354 3.036842
sample estimates:
mean in group Female mean in group Male
10.355212 8.069114
p1 <- ggplot(df, aes(x = ED_GenderBirthCert, y = CTQ_Total)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ Composite by Gender",
x = " ",
y = "CTQ Composite") + gghisto
# Sexual assault CTQ x Sex
t.test(WK2_CTQSF_SexAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_SexAbu_RS by ED_GenderBirthCert
t = 10.874, df = 2462.5, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.095160 1.577066
sample estimates:
mean in group Female mean in group Male
2.431145 1.095032
p2 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_SexAbu_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Sexual Abuse by Gender",
x = " ",
y = "Sexual Abuse Score") + gghisto
# physical abuse x Sex
t.test(WK2_CTQSF_PhyAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_PhyAbu_RS by ED_GenderBirthCert
t = 3.8607, df = 2199.9, p-value = 0.0001163
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
0.1738492 0.5327784
sample estimates:
mean in group Female mean in group Male
1.692407 1.339093
p3 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyAbu_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Physical Abuse by Gender",
x = " ",
y = "Physical Abuse Score") + gghisto
# physical neglect x sex
t.test(WK2_CTQSF_PhyNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_PhyNeg_RS by ED_GenderBirthCert
t = -2.0304, df = 1876.9, p-value = 0.04246
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.359130499 -0.006219423
sample estimates:
mean in group Female mean in group Male
1.462033 1.644708
p4 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyNeg_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Physical Neglect by Gender",
x = " ",
y = "Physical Neglect Score") + gghisto
#emotional abuse x sex
t.test(WK2_CTQSF_EmoAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_EmoAbu_RS by ED_GenderBirthCert
t = 8.046, df = 2232.8, p-value = 1.375e-15
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
0.6276099 1.0321326
sample estimates:
mean in group Female mean in group Male
2.865508 2.035637
p5 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoAbu_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Emotional Abuse by Gender",
x = " ",
y = "Emotional Abuse Score") + gghisto
#emotional neglect x sex
t.test(WK2_CTQSF_EmoNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_EmoNeg_RS by ED_GenderBirthCert
t = -0.51873, df = 1971.3, p-value = 0.604
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.2415469 0.1404965
sample estimates:
mean in group Female mean in group Male
1.904118 1.954644
p6 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoNeg_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Emotional Neglect by Gender",
x = " ",
y = "Emotional Neglect Score") + gghisto
#bullying x sex
p7 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_Bullying_Total)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Bullying by Gender",
x = " ",
y = "Bullying Score") + gghisto
ctq.sex.plots <- grid.arrange(p1, p2, p3, p4, p5, p6, p7, ncol = 3)
#save these plots to a pdf
#ggsave("ctq.sex.plots.pdf", ctq.sex.plots, width = 12, height = 10)
#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_Bullying_Total by ED_GenderBirthCert
t = 0.093303, df = 2028.3, p-value = 0.9257
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.1815477 0.1996852
sample estimates:
mean in group Female mean in group Male
2.969112 2.960043
#Look at the relationship between RACE and each CTQ category
# List of CTQ categories and their corresponding columns
ctq_tests <- list(
CTQ_Total = "CTQ_Total",
Physical_Abuse = "WK2_CTQSF_PhyAbu_RS",
Physical_Neglect = "WK2_CTQSF_PhyNeg_RS",
Emotional_Abuse = "WK2_CTQSF_EmoAbu_RS",
Emotional_Neglect = "WK2_CTQSF_EmoNeg_RS",
Sexual_Abuse = "WK2_CTQSF_SexAbu_RS",
Bullying = "WK2_Bullying_Total"
)
# Initialize a vector to store significant results
significant_tests <- c()
# Perform Kruskal-Wallis test and Dunn's test for each CTQ category
for (name in names(ctq_tests)) {
cat("Testing:", name, "\n")
kw_test <- kruskal.test(df[[ctq_tests[[name]]]] ~ df$ED_RaceEthCode)
print(kw_test)
if (kw_test$p.value < 0.05) {
cat("Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...\n")
dunn_result <- dunn.test(df[[ctq_tests[[name]]]], df$ED_RaceEthCode, method = "bh")
print(dunn_result)
significant_tests <- c(significant_tests, name)
} else {
cat("Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.\n")
}
cat("\n")
}
Testing: CTQ_Total
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 7.7918, df = 3, p-value = 0.05052
Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.
Testing: Physical_Abuse
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 7.5855, df = 3, p-value = 0.0554
Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.
Testing: Physical_Neglect
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 16.152, df = 3, p-value = 0.001056
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 16.1517, df = 3, p-value = 0
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | Black Hispanic Other
---------+---------------------------------
Hispanic | -2.334541
| 0.0196*
|
Other | -0.005352 1.280521
| 0.4979 0.1503
|
White | 2.476521 3.850210 0.998172
| 0.0199* 0.0004* 0.1909
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 16.1517
$Z
[1] -2.33454133 -0.00535235 1.28052161 2.47652115 3.85021063 0.99817227
$P
[1] 9.783697e-03 4.978647e-01 1.001809e-01 6.633488e-03 5.900814e-05 1.590979e-01
$P.adjusted
[1] 0.0195673948 0.4978647316 0.1502713109 0.0199004649 0.0003540489 0.1909174969
$comparisons
[1] "Black - Hispanic" "Black - Other" "Hispanic - Other" "Black - White" "Hispanic - White" "Other - White"
Testing: Emotional_Abuse
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 21.301, df = 3, p-value = 9.116e-05
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 21.3011, df = 3, p-value = 0
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | Black Hispanic Other
---------+---------------------------------
Hispanic | -2.330242
| 0.0297
|
Other | -0.628512 0.717342
| 0.3178 0.3549
|
White | -4.472250 -0.616834 -1.172279
| 0.0000* 0.2687 0.2411
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 21.30109
$Z
[1] -2.3302421 -0.6285124 0.7173426 -4.4722501 -0.6168349 -1.1722795
$P
[1] 9.896680e-03 2.648341e-01 2.365814e-01 3.870042e-06 2.686718e-01 1.205424e-01
$P.adjusted
[1] 2.969004e-02 3.178010e-01 3.548720e-01 2.322025e-05 2.686718e-01 2.410849e-01
$comparisons
[1] "Black - Hispanic" "Black - Other" "Hispanic - Other" "Black - White" "Hispanic - White" "Other - White"
Testing: Emotional_Neglect
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 6.728, df = 3, p-value = 0.08109
Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.
Testing: Sexual_Abuse
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 9.7111, df = 3, p-value = 0.02119
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 9.7111, df = 3, p-value = 0.02
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | Black Hispanic Other
---------+---------------------------------
Hispanic | -0.965636
| 0.3342
|
Other | 0.415519 0.905601
| 0.3389 0.2739
|
White | 2.570674 2.585689 0.620256
| 0.0152* 0.0292* 0.3211
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 9.711097
$Z
[1] -0.9656370 0.4155199 0.9056014 2.5706744 2.5856895 0.6202568
$P
[1] 0.167112936 0.338880672 0.182573441 0.005075035 0.004859222 0.267544376
$P.adjusted
[1] 0.33422587 0.33888067 0.27386016 0.01522510 0.02915533 0.32105325
$comparisons
[1] "Black - Hispanic" "Black - Other" "Hispanic - Other" "Black - White" "Hispanic - White" "Other - White"
Testing: Bullying
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 14.311, df = 3, p-value = 0.002511
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 14.3111, df = 3, p-value = 0
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | Black Hispanic Other
---------+---------------------------------
Hispanic | -1.381831
| 0.1670
|
Other | 0.107357 0.857417
| 0.4573 0.2347
|
White | -3.684295 -1.028753 -1.583135
| 0.0007* 0.2277 0.1701
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 14.31115
$Z
[1] -1.3818314 0.1073575 0.8574171 -3.6842953 -1.0287538 -1.5831355
$P
[1] 0.0835117408 0.4572526697 0.1956072094 0.0001146681 0.1517976943 0.0566952918
$P.adjusted
[1] 0.1670234816 0.4572526697 0.2347286513 0.0006880087 0.2276965414 0.1700858754
$comparisons
[1] "Black - Hispanic" "Black - Other" "Hispanic - Other" "Black - White" "Hispanic - White" "Other - White"
# Print names of significant CTQ categories
if (length(significant_tests) > 0) {
cat("CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:\n")
cat(paste(significant_tests, collapse = "\n"))
} else {
cat("No CTQ categories with significant Kruskal-Wallis test.\n")
}
CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:
Physical_Neglect
Emotional_Abuse
Sexual_Abuse
Bullying
library(knitr)
library(gt)
df %>%
group_by(ED_RaceEthCode) %>%
summarize(
Mean_CTQ_Tot = mean(CTQ_Total, na.rm = TRUE),
SD_CTQ_Tot = sd(CTQ_Total, na.rm = TRUE),
Mean_Phys_Abuse = mean(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
SD_Phys_Abuse = sd(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
Mean_Phys_Neglect = mean(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
SD_Phys_Neglect = sd(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
Mean_Emo_Abuse = mean(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
SD_Emo_Abuse = sd(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
Mean_Emo_Neglect = mean(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
SD_Emo_Neglect = sd(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
Mean_Sex_Abuse = mean(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
SD_Sex_Abuse = sd(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
Mean_Bullying = mean(WK2_Bullying_Total, na.rm = TRUE),
SD_Bullying = sd(WK2_Bullying_Total, na.rm = TRUE)
) %>%
gt() %>%
tab_header(
title = "Mean and Standard Deviation of CTQ Scores by Race"
) %>%
fmt_number(
columns = vars(starts_with("Mean"), starts_with("SD")),
decimals = 2
)
| Mean and Standard Deviation of CTQ Scores by Race | ||||||||||||||
| ED_RaceEthCode | Mean_CTQ_Tot | SD_CTQ_Tot | Mean_Phys_Abuse | SD_Phys_Abuse | Mean_Phys_Neglect | SD_Phys_Neglect | Mean_Emo_Abuse | SD_Emo_Abuse | Mean_Emo_Neglect | SD_Emo_Neglect | Mean_Sex_Abuse | SD_Sex_Abuse | Mean_Bullying | SD_Bullying |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Black | 9.62 | 10.03 | 1.54 | 2.30 | 1.65 | 2.31 | 2.33 | 2.57 | 1.98 | 2.48 | 2.13 | 3.56 | 2.83 | 2.43 |
| Hispanic | 10.91 | 10.37 | 1.90 | 2.48 | 1.82 | 2.16 | 2.75 | 2.72 | 2.18 | 2.34 | 2.26 | 3.62 | 3.04 | 2.44 |
| Other | 9.44 | 9.15 | 1.68 | 2.35 | 1.62 | 2.24 | 2.48 | 2.59 | 1.91 | 2.37 | 1.75 | 2.86 | 2.77 | 2.34 |
| White | 8.93 | 9.32 | 1.48 | 2.25 | 1.27 | 1.86 | 2.81 | 2.68 | 1.78 | 2.18 | 1.58 | 2.97 | 3.16 | 2.26 |
| NA | 6.71 | 7.61 | 0.57 | 1.13 | 1.00 | 1.15 | 2.29 | 3.09 | 1.29 | 1.80 | 1.57 | 3.36 | 2.29 | 3.15 |
library(ggpubr)
cor_results <- list(
"ADI vs PDI" = cor.test(df$ADI_NatRank, df$ED_PDI_RS, use="complete.obs", method = "spearman"),
"ADI vs CTQ" = cor.test(df$ADI_NatRank, df$CTQ_Total, use="complete.obs", method = "spearman"),
"PDI vs CTQ" = cor.test(df$ED_PDI_RS, df$CTQ_Total, use="complete.obs", method = "spearman"))
#print the number of complete cases for each of ADI, PDI, and CTQ
sum(complete.cases(df$ADI_NatRank))
[1] 2400
sum(complete.cases(df$ED_PDI_RS))
[1] 2341
sum(complete.cases(df$CTQ_Total))
[1] 2480
#print the number of complete cases overlapping between PDI and ADI
sum(complete.cases(df$ADI_NatRank, df$ED_PDI_RS))
[1] 2263
data.frame(
Comparison = names(cor_results),
rho = sapply(cor_results, function(x) round(x$estimate, 6)),
p_value = sapply(cor_results, function(x) round(x$p.value, 6)))
#plot of PDI x CTQ
p1 <- ggplot(df, aes(x = ED_PDI_RS, y = CTQ_Total)) +
geom_point(alpha = 0.3, size=3) +
geom_smooth(method = "lm", se = FALSE, color = "magenta3", lwd=3) +
stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) +
labs(title = "Scatter Plot of PDI vs CTQ Total",
x = "PDI", y = "CTQ Total") + gghisto
#Plot of ADI x CTQ
p2 <- ggplot(df, aes(x = ADI_NatRank, y = CTQ_Total)) +
geom_point(alpha = 0.3, size=3) +
geom_smooth(method = "lm", se = FALSE, color = "purple3", lwd=3) +
stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) +
labs(title = "Scatter Plot of ADI vs CTQ Total",
x = "ADI", y = "CTQ Total") + gghisto
grid.arrange(p1,p2)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
p7 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = CTQ_Total, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Composite CTQ by Race",
x = " ", y = "CTQ Composite Score") + gghisto
# Sexual assault CTQ x race
p8 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_SexAbu_RS, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Sexual Abuse by Race",
x = " ", y = "Sexual Abuse Score") + gghisto
# physical abuse x race
p9 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyAbu_RS, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Physical Abuse by Race",
x = " ", y = "Physical Abuse Score") + gghisto
# physical neglect x race
p10 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyNeg_RS, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Physical Neglect by Race",
x = " ", y = "Physical Neglect Score") + gghisto
#emotional abuse x race
p11 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoAbu_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Emotional Abuse by Race",
x = " ", y = "Emotional Abuse Score") + gghisto
#emotional neglect x race
p12 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoNeg_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "Emotional Neglect by Race",
x = " ", y = "Emotional Neglect Score") +
theme(legend.position = "none") + gghisto
#bullying x race
p13 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_Bullying_Total, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "Bullying by Race",
x = " ", y = "Bullying Score") +
theme(legend.position = "none") + gghisto
ctq.race.plots_simple <- grid.arrange(p7, p8, p9, p10, p11, p12, p13, ncol = 3)
#save as pdf
#ggsave("ctq.race.plots_simple.pdf", ctq.race.plots_simple, width = 12, height = 10)
library(ggsignif)
ggplot(df, aes(x = ED_GenderBirthCert, y = CTQ_Total, fill = ED_GenderBirthCert)) +
geom_boxplot() +
labs(title = "Boxplot of CTQ Total by Sex",
x = "Sex",
y = "CTQ Total") +
scale_fill_manual(values = c("pink", "skyblue")) + gghisto
#ttest of ctq total by sex
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)
Welch Two Sample t-test
data: CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.535354 3.036842
sample estimates:
mean in group Female mean in group Male
10.355212 8.069114
#calculate the % of participants reported experiencing at least one instance of any type of ELA (i.e. either CTQ≥1 or childhood Bullying ≥1) and the M (SD) for composite CTQ
df %>%
summarize(perc_any_ELA = mean(CTQ_Total >= 1 | WK2_Bullying_Total >= 1, na.rm = TRUE),
mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
sd_CTQ = sd(CTQ_Total, na.rm = TRUE))
how many people had at a score of 1 or more for at least two different CTQ subtypes
# Create a logical matrix where TRUE indicates a score of 1 or more
logical_matrix <- df %>%
select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS,
WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
mutate(across(everything(), ~ . >= 1))
# Count the number of TRUE values in each row
df$ctq_total_count <- rowSums(logical_matrix)
# Filter individuals with at least 2 CTQ subtypes with scores of 1 or more
sum(df$ctq_total_count >= 2)
[1] 1906
#what is the most common ctq subtype with percentages out of 2480
df %>%
select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS,
WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
pivot_longer(cols = everything(), names_to = "CTQ_Subtype", values_to = "Score") %>%
filter(Score >= 1) %>%
count(CTQ_Subtype) %>%
arrange(desc(n)) %>%
mutate(perc = n/2480)
#how many cases in the df had either bullying score 1+ or total CTQ score 1+
df %>% filter(CTQ_Total >= 1 | WK2_Bullying_Total >= 1) %>% nrow()
[1] 2260
#ttest comparing ctq in men vs women
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)
Welch Two Sample t-test
data: CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.535354 3.036842
sample estimates:
mean in group Female mean in group Male
10.355212 8.069114
#caclulcate std in addition to means for sex
df %>%
group_by(ED_GenderBirthCert) %>%
summarize(mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
sd_CTQ = sd(CTQ_Total, na.rm = TRUE))
#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_Bullying_Total by ED_GenderBirthCert
t = 0.093303, df = 2028.3, p-value = 0.9257
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.1815477 0.1996852
sample estimates:
mean in group Female mean in group Male
2.969112 2.960043
# Define the columns to correlate
columns <- colnames(df)[16:22]
#spearman correlation between bullying and each CTQ type
cor.test(df$WK2_Bullying_Total, df$WK2_CTQSF_PhyAbu_RS, method = "spearman")
Spearman's rank correlation rho
data: df$WK2_Bullying_Total and df$WK2_CTQSF_PhyAbu_RS
S = 1315924699, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.4823606
cor.test(df$WK2_Bullying_Total, df$WK2_CTQSF_EmoAbu_RS, method = "spearman")
Spearman's rank correlation rho
data: df$WK2_Bullying_Total and df$WK2_CTQSF_EmoAbu_RS
S = 1041136106, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.590453
cor.test(df$WK2_Bullying_Total, df$WK2_CTQSF_SexAbu_RS, method = "spearman")
Spearman's rank correlation rho
data: df$WK2_Bullying_Total and df$WK2_CTQSF_SexAbu_RS
S = 1629551991, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.3589905
cor.test(df$WK2_Bullying_Total, df$WK2_CTQSF_PhyNeg_RS, method = "spearman")
Spearman's rank correlation rho
data: df$WK2_Bullying_Total and df$WK2_CTQSF_PhyNeg_RS
S = 2093431990, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1765161
cor.test(df$WK2_Bullying_Total, df$WK2_CTQSF_EmoNeg_RS, method = "spearman")
Spearman's rank correlation rho
data: df$WK2_Bullying_Total and df$WK2_CTQSF_EmoNeg_RS
S = 1937158406, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.2379887
# Function to calculate Spearman correlations, p-values, and number of complete cases
correlation_summary <- function(x, y) {
cor_test <- cor.test(df[[x]], df[[y]], method = "spearman")
tibble(
Variable1 = x,
Variable2 = y,
Rho = cor_test$estimate,
P_Value = cor_test$p.value,
)
}
# Create a summary table
all_ctq_correlations <- expand_grid(Variable1 = columns, Variable2 = columns) %>%
filter(Variable1 != Variable2) %>%
pmap_dfr(~ correlation_summary(..1, ..2))
#save ctq correlations to a csv
write.csv(all_ctq_correlations, "all_ELA_correlations_for_Lauren.csv")
# Compute the correlation matrix
cor_matrix <- cor(df[,c(16:22)], use = "complete.obs", method="spearman")
#make a table that gives the correlation rho, p-value, and sample size for each spearman correlation
cor_table <- cor.mtest(df[,c(16:22)], conf.level = 0.95, method = "spearman")
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
Warning in cor.test.default(x = mat[, i], y = mat[, j], ...) :
Cannot compute exact p-value with ties
#save cor_matrix as a csv
write.csv(cor_matrix, "ELA_cor_matrix_for_Lauren.csv")
# Plot the correlation matrix
ggcorrplot(cor_matrix, lab = TRUE)
corrplot(cor_matrix, method = "color", type = "upper",
tl.col = "black", tl.srt = 45, addCoef.col = "black",
col = colorRampPalette(c("blue", "white", "red"))(200))
Examine data distributions
Main multi-nominal regression model with CTQ as a predictor variable, including our standard covariates (same in linear models), and latent pain trajectories as the dependent variables. Model 2 is this with extra features. extract marginal means from base model for making graphs
#pain trajectory ~ age + sex + race + site + CTQ
m1 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total, data = df)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3121.158684
iter 20 value 3081.890408
iter 30 value 3080.574683
final value 3080.567070
converged
#view output
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#examine variance inflation factor to assess collinearity
vif(m1)
Warning in vif.default(m1) : No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 12.103902 1 3.479066
Site_New 3.792135 2 1.395471
ED_RaceEthCode 3.031107 3 1.203003
ED_GenderBirthCert 1.753030 1 1.324021
CTQ_Total 2.846424 1 1.687135
Variance Inflation Factor (VIF) is used to detect multicollinearity. VIF > 10: Indicates high multicollinearity that might be problematic. VIF between 5 and 10: Moderate multicollinearity that might need addressing. VIF < 5: Generally considered acceptable.
Repeat this but with the z transformed data
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3121.158684
iter 20 value 3081.890408
iter 30 value 3080.574683
final value 3080.567070
converged
m2 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2728.643818
iter 20 value 2569.897008
iter 30 value 2493.320633
iter 40 value 2482.429441
iter 50 value 2482.132928
final value 2482.128451
converged
tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
vif(m2)
Warning in vif.default(m2) : No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 17.705104 1 4.207743
Site_New 4.738180 2 1.475377
ED_RaceEthCode 4.058971 3 1.262998
ED_GenderBirthCert 1.941989 1 1.393553
CTQ_Total 3.011481 1 1.735362
BMI 18.363059 1 4.285214
PRE_Pain_MdSv 3.067245 1 1.751355
ED_Marital 10.289081 2 1.790994
ADI_NatRank 12.774041 1 3.574079
ED_Event_BroadClass 2.156177 2 1.211772
ED_PDI_RS 7.261573 1 2.694731
# Create a list of categorical variables to test
categorical_vars <- c("ED_GenderBirthCert", "ED_Marital", "ED_RaceEthCode", "ED_Event_BroadClass", "Pain_Class")
# Generate all combinations of categorical variables
combinations <- combn(categorical_vars, 2, simplify = FALSE)
# Function to perform chi-square test and return results
perform_chisq_test <- function(var1, var2, data) {
table_data <- table(data[[var1]], data[[var2]])
test_result <- chisq.test(table_data)
result <- list(
variables = paste(var1, "vs", var2),
chi2 = test_result$statistic,
p_value = test_result$p.value
)
return(result)
}
# Perform chi-square tests on all combinations
results <- lapply(combinations, function(vars) perform_chisq_test(vars[1], vars[2], df))
# Print results
for (result in results) {
cat("Chi-Square Test for", result$variables, "\n")
cat("Chi-Square Statistic:", result$chi2, "\n")
cat("P-Value:", result$p_value, "\n\n")
}
Chi-Square Test for ED_GenderBirthCert vs ED_Marital
Chi-Square Statistic: 3.114948
P-Value: 0.2106676
Chi-Square Test for ED_GenderBirthCert vs ED_RaceEthCode
Chi-Square Statistic: 6.604558
P-Value: 0.08562894
Chi-Square Test for ED_GenderBirthCert vs ED_Event_BroadClass
Chi-Square Statistic: 40.6724
P-Value: 1.47265e-09
Chi-Square Test for ED_GenderBirthCert vs Pain_Class
Chi-Square Statistic: 32.45815
P-Value: 4.19001e-07
Chi-Square Test for ED_Marital vs ED_RaceEthCode
Chi-Square Statistic: 98.24693
P-Value: 5.82338e-19
Chi-Square Test for ED_Marital vs ED_Event_BroadClass
Chi-Square Statistic: 43.78164
P-Value: 7.121953e-09
Chi-Square Test for ED_Marital vs Pain_Class
Chi-Square Statistic: 23.03797
P-Value: 0.0007838599
Chi-Square Test for ED_RaceEthCode vs ED_Event_BroadClass
Chi-Square Statistic: 64.35096
P-Value: 5.852822e-12
Chi-Square Test for ED_RaceEthCode vs Pain_Class
Chi-Square Statistic: 36.85666
P-Value: 2.791426e-05
Chi-Square Test for ED_Event_BroadClass vs Pain_Class
Chi-Square Statistic: 10.04633
P-Value: 0.1227143
library(chisq.posthoc.test)
chisq.posthoc.test(table(df$ED_RaceEthCode, df$ED_GenderBirthCert), method = "bonferroni")
ggplot(df %>% drop_na(ED_RaceEthCode, ED_Marital), aes(x = ED_RaceEthCode, fill = ED_RaceEthCode)) +
geom_bar() + facet_wrap(~ ED_Marital) +
scale_x_discrete(labels = c("1" = "Hispanic",
"2" = "NH White",
"3" = "NH Black",
"4" = "Other"))+
labs(title = "Counts of Race Ethnicity by Marital Status",
x = "Race Ethnicity", y = "Count", fill = "Race Ethnicity") + gghisto
ggplot(df %>% drop_na(ED_RaceEthCode, ED_GenderBirthCert), aes(x = ED_RaceEthCode, fill = ED_RaceEthCode)) +
geom_bar() + facet_wrap(~ ED_GenderBirthCert) +
scale_x_discrete(labels = c("1" = "Hispanic",
"2" = "NH White",
"3" = "NH Black",
"4" = "Other"))+
labs(title = "Counts of Race Ethnicity by Sex",
x = "Race Ethnicity", y = "Count", fill = "Race Ethnicity") + gghisto
ggplot(df %>% drop_na(ED_RaceEthCode, ED_highestgrade), aes(x = ED_RaceEthCode, fill = ED_RaceEthCode)) +
geom_bar() + facet_wrap(~ ED_highestgrade) +
scale_x_discrete(labels = c("1" = "Hispanic",
"2" = "NH White",
"3" = "NH Black",
"4" = "Other"))+
labs(title = "Counts of Race Ethnicity by Education",
x = "Race Ethnicity", y = "Count", fill = "Race Ethnicity") + gghisto
#3rd model is same as M2 but with sex*CTQ interaction
m3 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Concussion + ED_PDI_RS + ED_GenderBirthCert*CTQ_Total, data = df_traj)
# weights: 60 (42 variable)
initial value 3013.803941
iter 10 value 2870.075351
iter 20 value 2714.636983
iter 30 value 2589.651923
iter 40 value 2585.457059
iter 50 value 2585.072939
iter 50 value 2585.072937
iter 50 value 2585.072937
final value 2585.072937
converged
tidy(m3, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#4th model is same as m0 but only week 2 emotional abuse scale
m4 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3132.788294
iter 20 value 3070.120682
iter 30 value 3066.510436
final value 3066.318761
converged
tidy(m4, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#5th model is physical abuse
m5 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3145.463616
iter 20 value 3084.213076
iter 30 value 3080.832860
final value 3080.817912
converged
tidy(m5, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))%>%
filter(term == "CTQ_Total")
#physical abuse*sex interaction term added
m5b <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + WK2_CTQSF_PhyAbu_RS*ED_GenderBirthCert, data = df_traj)
# weights: 44 (30 variable)
initial value 3428.305955
iter 10 value 3143.496277
iter 20 value 3082.081866
iter 30 value 3076.082550
final value 3075.806127
converged
tidy(m5b, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#6th model is sexual abuse
m6 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3146.612453
iter 20 value 3093.939191
iter 30 value 3092.521772
final value 3092.509370
converged
tidy(m6, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#7th model is emotional neglect
m7 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3132.722284
iter 20 value 3107.694318
iter 30 value 3106.839218
final value 3106.761270
converged
tidy(m7, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#8th model is physical neglect
m8 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3152.638071
iter 20 value 3109.332828
iter 30 value 3106.398970
final value 3106.263598
converged
tidy(m8, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m9 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS*WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 80 (57 variable)
initial value 2905.672981
iter 10 value 2727.290452
iter 20 value 2611.579846
iter 30 value 2508.943598
iter 40 value 2474.114807
iter 50 value 2469.785722
iter 60 value 2469.681778
final value 2469.679010
converged
#10th model is bullying
m10 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total, data = df_traj)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3138.794431
iter 20 value 3079.793835
iter 30 value 3076.555381
final value 3076.498578
converged
tidy(m10, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m11 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + WK2_Bullying_Total + WK2_CTQSF_PhyAbu_RS , data = df)
# weights: 48 (33 variable)
initial value 3428.305955
iter 10 value 3168.445796
iter 20 value 3089.370072
iter 30 value 3058.198215
final value 3057.237154
converged
tidy(m11, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m12 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad, data = df)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3112.030561
iter 20 value 3063.137502
iter 30 value 3062.108928
final value 3062.073356
converged
tidy(m12, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
vif(m12)
Warning in vif.default(m12) : No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 12.129317 1 3.482717
Site_New 3.808734 2 1.396996
ED_RaceEthCode 3.076682 3 1.205999
ED_GenderBirthCert 1.749248 1 1.322591
CTQ_Triad 3.496597 1 1.869919
m13 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad + BMI + PRE_Pain_MdSv + ED_Concussion + ED_Event_BroadClass + ED_PDI_RS + ADI_NatRank, data = df)
# weights: 68 (48 variable)
initial value 2915.377041
iter 10 value 2739.210619
iter 20 value 2565.857079
iter 30 value 2498.108035
iter 40 value 2488.122117
iter 50 value 2487.250228
final value 2487.224804
converged
tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m14 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_RaceEthCode, data = df)
# weights: 84 (60 variable)
initial value 2905.672981
iter 10 value 2722.907873
iter 20 value 2616.833003
iter 30 value 2546.832934
iter 40 value 2481.950826
iter 50 value 2476.768761
iter 60 value 2476.190099
final value 2476.169849
converged
tidy(m14, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m15 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_GenderBirthCert, data = df)
# weights: 76 (54 variable)
initial value 2905.672981
iter 10 value 2726.539466
iter 20 value 2582.710939
iter 30 value 2500.600668
iter 40 value 2481.028857
iter 50 value 2479.968836
iter 60 value 2479.937970
final value 2479.937758
converged
tidy(m15, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#physical abuse m16
m16 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2733.545607
iter 20 value 2587.983556
iter 30 value 2490.024136
iter 40 value 2478.329919
iter 50 value 2477.954484
final value 2477.951022
converged
#physical neglect m17
m17 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.903857
iter 20 value 2584.668068
iter 30 value 2501.712727
iter 40 value 2491.840790
iter 50 value 2491.513528
final value 2491.509037
converged
#emotional abuse m18
m18 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2734.160534
iter 20 value 2554.474284
iter 30 value 2483.807215
iter 40 value 2474.618814
iter 50 value 2474.329602
final value 2474.325487
converged
#emotional neglect m19
m19 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.219498
iter 20 value 2566.620944
iter 30 value 2499.532281
iter 40 value 2490.809627
iter 50 value 2490.346968
iter 60 value 2490.335809
iter 60 value 2490.335809
iter 60 value 2490.335809
final value 2490.335809
converged
#sexual abuse m20
m20 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2735.598816
iter 20 value 2574.704703
iter 30 value 2497.418532
iter 40 value 2487.061070
iter 50 value 2486.784705
final value 2486.781308
converged
#bullying m21
m21 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2743.100075
iter 20 value 2578.785565
iter 30 value 2490.684674
iter 40 value 2479.988106
iter 50 value 2479.503675
iter 60 value 2479.495449
iter 60 value 2479.495449
iter 60 value 2479.495449
final value 2479.495449
converged
tidy(m16, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m17, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m18, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m19, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m20, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m21, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m22 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + ED_GenderBirthCert*CTQ_Total, data = df)
# weights: 44 (30 variable)
initial value 3428.305955
iter 10 value 3209.314694
iter 20 value 3081.328543
iter 30 value 3076.161084
final value 3075.998418
converged
tidy(m22, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m23 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + ED_RaceEthCode*CTQ_Total, data = df)
# weights: 52 (36 variable)
initial value 3428.305955
iter 10 value 3235.013363
iter 20 value 3129.374452
iter 30 value 3075.511681
iter 40 value 3073.881816
final value 3073.873786
converged
tidy(m23, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
df_fem <- df %>% filter(ED_GenderBirthCert == "Female")
m24 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data=df_fem)
# weights: 68 (48 variable)
initial value 1821.590791
iter 10 value 1749.478705
iter 20 value 1646.945986
iter 30 value 1598.150436
iter 40 value 1595.869432
iter 50 value 1595.840267
final value 1595.840098
converged
tidy(m24, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
df_male <- df %>% filter(ED_GenderBirthCert == "Male")
m25 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data=df_male)
# weights: 68 (48 variable)
initial value 1084.082190
iter 10 value 961.847223
iter 20 value 902.498097
iter 30 value 870.184813
iter 40 value 868.729204
final value 868.722781
converged
tidy(m25, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#Model 2 but instead of physical abuse score, its a binary score of whether or not the pt had any physical abuse
m26 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.488587
iter 20 value 2541.484969
iter 30 value 2487.267864
iter 40 value 2481.676257
iter 50 value 2481.372709
final value 2481.370969
converged
tidy(m26, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
Model 27: M2 + Emo_Abuse ANY
#Model 2 but instead of emotional abuse score, its a binary score of whether or not the pt had any emotional abuse
m27 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.813403
iter 20 value 2538.022720
iter 30 value 2486.831808
iter 40 value 2482.419118
iter 50 value 2482.179920
final value 2482.179663
converged
tidy(m27, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 28: M2 + Emo_Neglect ANY
#Model 2 but instead of emotional neglect score, its a binary score of whether or not the pt had any emotional neglect
m28 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.932187
iter 20 value 2547.032244
iter 30 value 2494.450942
iter 40 value 2490.221890
iter 50 value 2490.039676
final value 2490.038322
converged
tidy(m28, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 29: M2 + Phy_Neglect ANY
#Model 2 but instead of physical neglect score, its a binary score of whether or not the pt had any physical neglect
m29 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.969951
iter 20 value 2547.879003
iter 30 value 2493.444387
iter 40 value 2489.112052
iter 50 value 2488.981415
final value 2488.979319
converged
tidy(m29, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 30: M2 + Sexual Abuse ANY
#Model 2 but instead of sexual abuse score, its a binary score of whether or not the pt had any sexual abuse
m30 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + SexAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.651133
iter 20 value 2542.048381
iter 30 value 2490.932778
iter 40 value 2486.717375
iter 50 value 2486.504995
final value 2486.501431
converged
tidy(m30, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#Model 2 but instead of bullying score, its a binary score of whether or not the pt had any bullying
m31 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + Bullying_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.435587
iter 20 value 2546.950367
iter 30 value 2494.299098
iter 40 value 2488.287607
iter 50 value 2488.161517
final value 2488.159906
converged
tidy(m31, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#write a model that does multinomial stepwise selection to predict pain_class from the best features from df
#rewrite stepwise code to remove missing values
invisible(capture.output(
stepwise_selection <- stepAIC(
multinom(Pain_Class ~ ., data = drop_na(df)),
direction = "both",
trace = 0)
))
Warning in stepAIC(multinom(Pain_Class ~ ., data = drop_na(df)), direction = "both", :
0 df terms are changing AIC
# Initial features (excluding the response variable 'Pain_Class')
initial_features <- names(df)[names(df) != "Pain_Class"]
included_features <- names(coef(stepwise_selection))
excluded_features <- setdiff(initial_features, included_features)
# Print included and excluded features
cat("Included features:\n")
Included features:
print(included_features)
NULL
cat("\nExcluded features:\n")
Excluded features:
print(excluded_features)
[1] "ED_Age" "Site_New" "ED_RaceEthCode" "ED_GenderBirthCert" "ADI_NatRank"
[6] "CTQ_Total" "BMI" "PRE_Pain_MdSv" "WK2_EmploymentCode" "ED_Marital"
[11] "ED_highestgrade" "ED_Concussion" "ED_Event_BroadClass" "ED_PDI_RS" "WK2_CTQSF_PhyAbu_RS"
[16] "WK2_CTQSF_EmoAbu_RS" "WK2_CTQSF_SexAbu_RS" "WK2_CTQSF_PhyNeg_RS" "WK2_CTQSF_EmoNeg_RS" "WK2_Bullying_Total"
[21] "CTQ_Triad" "Bullying_Any" "PhyAbu_Any" "EmoAbu_Any" "SexAbu_Any"
[26] "PhyNeg_Any" "EmoNeg_Any" "CTQ_Any" "ctq_total_count"
We wanted to make sure to include an analysis that closely mirrors our first animal figure (prior to SPS/TSE exposure, animals with ELA have increase pain-like behavior). I do not think we have done anything like this yet. Can you run an analysis to test whether there is a difference in PrePain rates/odds based on ELA?
I think this would either be a logistic regression using the CTQ composite and (separately) the Bullying composite, or a chi-square/z-proportion test and we could group CTQ and Bullying into high and low based on their median score (CTQ>6 and Bullying>3) or just use the “any CTQ” and “any Bullying variables”.
# Create binary variables for high/low CTQ and Bullying
df <- df %>%
mutate(
CTQ_HighLow = ifelse(CTQ_Total > 6, "High", "Low"),
Bullying_HighLow = ifelse(WK2_Bullying_Total > 3, "High", "Low"))
# Create binary variable for PRE_Pain_MdSv
df$PRE_Pain_MdSv2 <- ifelse(df$PRE_Pain_MdSv == "Yes", 1, 0)
# M33 Logistic regression using CTQ composite
m33 <- glm(PRE_Pain_MdSv2 ~ CTQ_Total, data = df, family = "binomial")
tidy(m33, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric)))
# M34 Logistic regression using Bullying composite
m34 <- glm(PRE_Pain_MdSv2 ~ WK2_Bullying_Total, data = df, family = "binomial")
tidy(m34, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric)))
# Chi-square test for CTQ_HighLow
chisq.test(table(df$CTQ_HighLow, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$CTQ_HighLow, df$PRE_Pain_MdSv)
X-squared = 33.911, df = 1, p-value = 5.77e-09
# Chi-square test for Bullying_HighLow
chisq.test(table(df$Bullying_HighLow, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$Bullying_HighLow, df$PRE_Pain_MdSv)
X-squared = 17.926, df = 1, p-value = 2.297e-05
# chi-square test for any CTQ
chisq.test(table(df$CTQ_Any, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$CTQ_Any, df$PRE_Pain_MdSv)
X-squared = 23.917, df = 1, p-value = 1.006e-06
# chi-square test for any bullying
chisq.test(table(df$Bullying_Any, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$Bullying_Any, df$PRE_Pain_MdSv)
X-squared = 4.2373, df = 1, p-value = 0.03955
#ANOVA on race vs CTQ
#ANOVA on ED_RaceEthCode versus CTQ
anova_result <- aov(CTQ_Total ~ ED_RaceEthCode, data = df)
summary(anova_result)
Df Sum Sq Mean Sq F value Pr(>F)
ED_RaceEthCode 3 862 287.37 3 0.0295 *
Residuals 2469 236484 95.78
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
7 observations deleted due to missingness
TukeyHSD(anova_result)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = CTQ_Total ~ ED_RaceEthCode, data = df)
$ED_RaceEthCode
diff lwr upr p adj
Hispanic-Black 1.2832267 -0.3897563 2.9562097 0.1989380
Other-Black -0.1836877 -2.9182839 2.5509085 0.9981705
White-Black -0.6973222 -1.8074146 0.4127702 0.3703250
Other-Hispanic -1.4669144 -4.5055288 1.5717001 0.6007039
White-Hispanic -1.9805489 -3.7089778 -0.2521200 0.0171293
White-Other -0.5136345 -3.2824990 2.2552300 0.9641946
#ANOVA on pain trajectory by CTQ
result <- aov(CTQ_Total ~ Pain_Class, data = df)
summary(result)
Df Sum Sq Mean Sq F value Pr(>F)
Pain_Class 3 5647 1882.4 20.08 7.39e-13 ***
Residuals 2476 232101 93.7
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
TukeyHSD(result)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = CTQ_Total ~ Pain_Class, data = df)
$Pain_Class
diff lwr upr p adj
high-low 4.074087 2.54471191 5.6034614 0.0000000
moderate-low 2.734025 1.45769353 4.0103565 0.0000002
moderate recovery-low 1.357109 -0.02229229 2.7365107 0.0557627
moderate-high -1.340062 -3.01637594 0.3362527 0.1683568
moderate recovery-high -2.716977 -4.47304012 -0.9609148 0.0004163
moderate recovery-moderate -1.376916 -2.91762703 0.1637954 0.0988564
m1 #base model m2 #base model + additional features m12 #base model + CTQ_Triad m13 #m2 + CTQ_T
m16 #m2 + physical abuse m17 #m2 + physical neglect m18 #m2 + emotional abuse m19 #m2 + emotional neglect m20 #m2 + sexual abuse m21 #m2 + bullying
# List of all models
model_names <- paste0("m", 1:31)
models <- lapply(model_names, get)
#remove m25 from list of models
model_names2 <- model_names[-25]
models2 <- lapply(model_names2, get)
# Function to get AIC and BIC
get_aic_bic <- function(model) c(AIC = AIC(model), BIC = BIC(model))
# Apply the function to all models, sort results
compared_m <- as.data.frame(t(sapply(models, get_aic_bic)))
compared_m <- cbind(model = model_names, compared_m)
compared_m <- compared_m[order(compared_m$BIC), ]
# Print the results
print(compared_m)
#m18 is m2 + emotional abuse
#m13 is m2 + triad
#m16 is m2 + physical abuse
#m21 is m2 + bullying
#m2 is our main model
#m20 is m2 + sexual abuse
#m19 is m2 + emotional neglect
Trying this with m1, base model
# Load required library
library(pROC)
# Predict probabilities for m25
probs1 <- predict(m25, df, type = "probs")
# Create one-vs-rest ROC curves
roc_list <- list()
auroc_values <- numeric(length(colnames(probs1))) # Initialize vector for AUROC values
for (class in colnames(probs1)) {
roc_list[[class]] <- roc(as.numeric(df$Pain_Class == class), probs1[,class])
auroc_values[class] <- auc(roc_list[[class]]) # Calculate AUROC
}
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
# Plot ROC curves
plot(roc_list[[1]], main = "ROC Curves for Each Class", col = 1, lwd = 2)
for (i in 2:length(roc_list)) {
plot(roc_list[[i]], add = TRUE, col = i, lwd = 2)
}
# Add AUROC values as text directly on the plot
# Use a fixed position or adjust as needed
text_positions <- c(0.8, 0.7, 0.6, 0.5, 0.4, 0.3) # Adjust as necessary for your plot
for (i in 1:length(roc_list)) {
text(x = 0.6, y = text_positions[i],
labels = paste0(names(roc_list)[i], ": AUROC = ", round(auroc_values[i], 3)),
pos = 4, cex = 0.8, col = i)
}
# Add legend to the plot
legend("bottomright", legend = names(roc_list), col = 1:length(roc_list), lwd = 2, bty = "n")
library(caret)
Loading required package: lattice
Attaching package: ‘caret’
The following object is masked from ‘package:purrr’:
lift
# Function to compute confusion matrix with model names
get_confusion_matrix <- function(model, data, model_name) {
predictions <- predict(model, newdata = data, type = "class")
cm <- confusionMatrix(predictions, data$Pain_Class)
return(list(model = model_name, confusion_matrix = cm))}
# Compute confusion matrices with model names
model_names <- c("m2", "m25")
models <- lapply(model_names, get)
conf_matrices <- mapply(get_confusion_matrix, models, MoreArgs = list(data = df), model_name = model_names, SIMPLIFY = FALSE)
# Print the confusion matrices with model names
for (cm in conf_matrices) {
cat("Model:", cm$model, "\n")
print(cm$confusion_matrix)
cat("\n")}
Model: m2
Confusion Matrix and Statistics
Reference
Prediction low high moderate moderate recovery
low 798 121 294 304
high 27 80 58 30
moderate 77 89 128 55
moderate recovery 8 5 9 13
Overall Statistics
Accuracy : 0.4862
95% CI : (0.4646, 0.5078)
No Information Rate : 0.4342
P-Value [Acc > NIR] : 9.46e-07
Kappa : 0.1852
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: low Class: high Class: moderate Class: moderate recovery
Sensitivity 0.8769 0.27119 0.26176 0.032338
Specificity 0.3938 0.93615 0.86248 0.987013
Pos Pred Value 0.5260 0.41026 0.36676 0.371429
Neg Pred Value 0.8066 0.88690 0.79336 0.811257
Prevalence 0.4342 0.14074 0.23330 0.191794
Detection Rate 0.3807 0.03817 0.06107 0.006202
Detection Prevalence 0.7238 0.09303 0.16651 0.016698
Balanced Accuracy 0.6353 0.60367 0.56212 0.509676
Model: m25
Confusion Matrix and Statistics
Reference
Prediction low high moderate moderate recovery
low 787 124 301 300
high 34 77 60 42
moderate 64 81 102 44
moderate recovery 25 13 26 16
Overall Statistics
Accuracy : 0.4685
95% CI : (0.447, 0.4901)
No Information Rate : 0.4342
P-Value [Acc > NIR] : 0.0008348
Kappa : 0.1601
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: low Class: high Class: moderate Class: moderate recovery
Sensitivity 0.8648 0.26102 0.20859 0.039801
Specificity 0.3887 0.92449 0.88239 0.962220
Pos Pred Value 0.5205 0.36150 0.35052 0.200000
Neg Pred Value 0.7894 0.88423 0.78560 0.808532
Prevalence 0.4342 0.14074 0.23330 0.191794
Detection Rate 0.3755 0.03674 0.04866 0.007634
Detection Prevalence 0.7214 0.10162 0.13884 0.038168
Balanced Accuracy 0.6268 0.59275 0.54549 0.501010
for m1, m2, m2 + each subtype, m2 + each subtype ANY likelihood ratio test m1 vs m2
apply_bh_correction <- function(models, model_names) {
correct_model <- function(model) {
model_results <- tidy(model, exponentiate = TRUE) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)
model_results <- model_results %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH")) %>%
mutate(
p.value = round(p.value, digits = 6),
p_adj_bh = round(p_adj_bh, digits = 6)
) %>%
mutate(
p.value = format(p.value, scientific = FALSE),
p_adj_bh = format(p_adj_bh, scientific = FALSE)
)
return(model_results)
}
corrected_models <- lapply(models, correct_model)
names(corrected_models) <- model_names
return(corrected_models)
}
models_list_bases <- list(m1,m2)
model_names <- c("m1", "m2")
adjusted_models <- apply_bh_correction(models_list_bases, model_names)
cat("MODEL 1 BH CORRECTED")
MODEL 1 BH CORRECTED
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
cat("\n \n MODEL 2 BH CORRECTED")
MODEL 2 BH CORRECTED
tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
Within
cat("MODEL 13 BH CORRECTED")
MODEL 13 BH CORRECTED
tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
#function to get adjusted p values from list of models
extract_p_values <- function(model_df, model_name) {
model_df %>%
mutate(model = model_name) %>%
select(y.level, term, p.value,p_adj_bh, model)}
perform BH corrections on the values in combined_p_values_m16_m21
#BH corrections for models with subtypes m16-m21
#make first set of adjusted within p vals
models_list_subtypes <- list(m16, m17, m18, m19, m20, m21)
model_names2 <- c("m16", "m17", "m18", "m19", "m20", "m21")
adjusted_models2 <- apply_bh_correction(models_list_subtypes, model_names2)
combined_p_values_m16_m21 <- map2_dfr(adjusted_models2, paste0("m", 16:21), extract_p_values)
double_corrected_p_values_16_m21 <- combined_p_values_m16_m21 %>%
mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
mutate(
p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE) )
double_corrected_p_values_16_m21
#BH BETWEEN Corrections for models with binary indicators m26-m31
models_list_ANY <- list(m26, m27, m28, m29, m30, m31)
model_names3 <- c("m26", "m27", "m28", "m29", "m30", "m31")
adjusted_models3 <- apply_bh_correction(models_list_ANY, model_names3)
double_corrected_p_values_m26_m31 <- map2_dfr(adjusted_models3, paste0("m", 26:31), extract_p_values)
#perform BH corrections on the values in double_corrected_p_values_m26_m31
double_corrected_p_values_m26_m31 <- double_corrected_p_values_m26_m31 %>%
mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
mutate(
p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE) )
double_corrected_p_values_m26_m31
Table 1a: Socio-demographic variables
table1a_features <- c("ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank")
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1a_features)) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2)))
| Characteristic | N = 2,4801 |
|---|---|
| ED_GenderBirthCert | |
| Female | 1,554 (63%) |
| Male | 926 (37%) |
| ED_Age | 36.09 (13.31) |
| ED_RaceEthCode | |
| Black | 1,213 (49%) |
| Hispanic | 278 (11%) |
| Other | 91 (3.7%) |
| White | 891 (36%) |
| Unknown | 7 |
| BMI | 30.13 (8.32) |
| Unknown | 158 |
| ADI_NatRank | 63.93 (27.57) |
| Unknown | 80 |
| 1 n (%); Mean (SD) | |
Table 1b: ED/Trauma-related variable.names
table1b_features <- c("Site_New", "ED_Event_BroadClass", "ED_PDI_RS")
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1b_features)) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2)))
| Characteristic | N = 2,4801 |
|---|---|
| Site_New | |
| Midwest | 960 (39%) |
| NorthEast | 1,053 (42%) |
| SouthEast | 467 (19%) |
| ED_Event_BroadClass | |
| MVC/Non-motor Collision | 1,909 (77%) |
| Other | 340 (14%) |
| Physical/Sexual Abuse | 231 (9.3%) |
| ED_PDI_RS | 13.92 (7.24) |
| Unknown | 139 |
| 1 n (%); Mean (SD) | |
Table 1c: Past pain/stress variables
table1c_features <- c("PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")
# Create a named list to specify the type of each variable
type_list <- list(
WK2_CTQSF_PhyAbu_RS = "continuous",
WK2_CTQSF_EmoAbu_RS = "continuous",
WK2_CTQSF_SexAbu_RS = "continuous",
WK2_CTQSF_PhyNeg_RS = "continuous",
WK2_CTQSF_EmoNeg_RS = "continuous",
WK2_Bullying_Total = "continuous")
# Generate the summary table
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1c_features)) %>%
tbl_summary(
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2,
type = type_list)))
| Characteristic | N = 2,4801 |
|---|---|
| PRE_Pain_MdSv | 777 (31%) |
| Unknown | 10 |
| CTQ_Any | 1,977 (80%) |
| CTQ_Total | 9.50 (9.79) |
| PhyAbu_Any | 1,089 (44%) |
| WK2_CTQSF_PhyAbu_RS | 1.56 (2.31) |
| EmoAbu_Any | 1,627 (66%) |
| WK2_CTQSF_EmoAbu_RS | 2.56 (2.64) |
| SexAbu_Any | 867 (35%) |
| WK2_CTQSF_SexAbu_RS | 1.93 (3.35) |
| PhyNeg_Any | 1,146 (46%) |
| WK2_CTQSF_PhyNeg_RS | 1.53 (2.14) |
| EmoNeg_Any | 1,284 (52%) |
| WK2_CTQSF_EmoNeg_RS | 1.92 (2.36) |
| Bullying_Any | 1,977 (80%) |
| WK2_Bullying_Total | 2.97 (2.37) |
| 1 n (%); Mean (SD) | |
table1_class_features <- c("Pain_Class","ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank","Site_New", "ED_Event_BroadClass", "ED_PDI_RS","PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")
# Create a named list to specify the type of each variable
type_list <- list(
WK2_CTQSF_PhyAbu_RS = "continuous",
WK2_CTQSF_EmoAbu_RS = "continuous",
WK2_CTQSF_SexAbu_RS = "continuous",
WK2_CTQSF_PhyNeg_RS = "continuous",
WK2_CTQSF_EmoNeg_RS = "continuous",
WK2_Bullying_Total = "continuous")
# Generate the summary table and compare across class and add p
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1_class_features)) %>%
tbl_summary(
by = Pain_Class,
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2,
type = type_list) %>% add_p))
| Characteristic | low, N = 1,0701 | high, N = 3521 | moderate, N = 5901 | moderate recovery, N = 4681 | p-value2 |
|---|---|---|---|---|---|
| ED_GenderBirthCert | <0.001 | ||||
| Female | 605 (57%) | 237 (67%) | 409 (69%) | 303 (65%) | |
| Male | 465 (43%) | 115 (33%) | 181 (31%) | 165 (35%) | |
| ED_Age | 33.36 (12.52) | 40.89 (13.19) | 37.61 (13.12) | 36.81 (14.00) | <0.001 |
| ED_RaceEthCode | <0.001 | ||||
| Black | 521 (49%) | 210 (60%) | 285 (49%) | 197 (42%) | |
| Hispanic | 122 (11%) | 37 (11%) | 77 (13%) | 42 (9.0%) | |
| Other | 39 (3.6%) | 12 (3.4%) | 23 (3.9%) | 17 (3.6%) | |
| White | 387 (36%) | 93 (26%) | 201 (34%) | 210 (45%) | |
| Unknown | 1 | 0 | 4 | 2 | |
| BMI | 29.17 (7.95) | 32.15 (9.44) | 30.95 (8.28) | 29.75 (7.93) | <0.001 |
| Unknown | 78 | 21 | 38 | 21 | |
| ADI_NatRank | 62.03 (28.04) | 70.74 (26.08) | 66.28 (26.76) | 60.04 (27.52) | <0.001 |
| Unknown | 41 | 8 | 14 | 17 | |
| Site_New | 0.3 | ||||
| Midwest | 432 (40%) | 139 (39%) | 216 (37%) | 173 (37%) | |
| NorthEast | 441 (41%) | 136 (39%) | 267 (45%) | 209 (45%) | |
| SouthEast | 197 (18%) | 77 (22%) | 107 (18%) | 86 (18%) | |
| ED_Event_BroadClass | 0.12 | ||||
| MVC/Non-motor Collision | 814 (76%) | 268 (76%) | 469 (79%) | 358 (76%) | |
| Other | 152 (14%) | 40 (11%) | 76 (13%) | 72 (15%) | |
| Physical/Sexual Abuse | 104 (9.7%) | 44 (13%) | 45 (7.6%) | 38 (8.1%) | |
| ED_PDI_RS | 12.49 (7.09) | 15.81 (7.33) | 15.00 (7.07) | 14.49 (7.13) | <0.001 |
| Unknown | 43 | 27 | 42 | 27 | |
| PRE_Pain_MdSv | 189 (18%) | 214 (61%) | 251 (43%) | 123 (27%) | <0.001 |
| Unknown | 2 | 1 | 3 | 4 | |
| CTQ_Any | 805 (75%) | 301 (86%) | 490 (83%) | 381 (81%) | <0.001 |
| CTQ_Total | 8.02 (9.02) | 12.09 (10.63) | 10.75 (10.33) | 9.37 (9.54) | <0.001 |
| PhyAbu_Any | 397 (37%) | 193 (55%) | 298 (51%) | 201 (43%) | <0.001 |
| WK2_CTQSF_PhyAbu_RS | 1.23 (2.08) | 2.30 (2.74) | 1.85 (2.41) | 1.38 (2.13) | <0.001 |
| EmoAbu_Any | 623 (58%) | 260 (74%) | 416 (71%) | 328 (70%) | <0.001 |
| WK2_CTQSF_EmoAbu_RS | 2.07 (2.45) | 3.40 (2.92) | 2.94 (2.71) | 2.55 (2.51) | <0.001 |
| SexAbu_Any | 297 (28%) | 148 (42%) | 238 (40%) | 184 (39%) | <0.001 |
| WK2_CTQSF_SexAbu_RS | 1.43 (2.94) | 2.63 (3.87) | 2.37 (3.65) | 2.00 (3.24) | <0.001 |
| PhyNeg_Any | 460 (43%) | 177 (50%) | 297 (50%) | 212 (45%) | 0.012 |
| WK2_CTQSF_PhyNeg_RS | 1.47 (2.22) | 1.69 (2.17) | 1.61 (2.08) | 1.44 (2.03) | 0.028 |
| EmoNeg_Any | 525 (49%) | 189 (54%) | 327 (55%) | 243 (52%) | 0.078 |
| WK2_CTQSF_EmoNeg_RS | 1.81 (2.36) | 2.07 (2.43) | 1.98 (2.27) | 2.00 (2.39) | 0.080 |
| Bullying_Any | 814 (76%) | 290 (82%) | 476 (81%) | 397 (85%) | <0.001 |
| WK2_Bullying_Total | 2.60 (2.25) | 3.63 (2.63) | 3.17 (2.42) | 3.05 (2.25) | <0.001 |
| 1 n (%); Mean (SD) | |||||
| 2 Pearson’s Chi-squared test; Kruskal-Wallis rank sum test | |||||
# What % were bullied?
df %>%
summarize(
N = n(),
Bullying_Any = sum(Bullying_Any == 1),
Percent_Bullied = round(Bullying_Any / N * 100, 2))
#boxplot of each ctq type
df %>%
pivot_longer(
cols = starts_with("WK2_CTQSF"),
names_to = "CTQ_Type",
values_to = "Score") %>%
ggplot(aes(x = CTQ_Type, y = Score)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
kruskal_wallis_results <- function(df, features, group_var) {
test_feature <- function(feature) {
if (!feature %in% names(df)) {
return(data.frame(
Feature = feature,
Chi_Square = NA,
P_Value = NA
))}
kw_test <- tryCatch(
kruskal.test(df[[feature]] ~ df[[group_var]]),
error = function(e) {
return(list(statistic = NA, p.value = NA))
})
chi_squared <- kw_test$statistic
p_value <- kw_test$p.value
return(data.frame(
Feature = feature,
Chi_Square = chi_squared,
P_Value = p_value
))}
results <- map_dfr(features, test_feature)
return(results)
}
continuous_features <- c("WK2_CTQSF_PhyAbu_RS", "WK2_CTQSF_EmoAbu_RS", "WK2_CTQSF_SexAbu_RS",
"WK2_CTQSF_PhyNeg_RS", "WK2_CTQSF_EmoNeg_RS", "WK2_Bullying_Total",
"ED_Age", "BMI", "ADI_NatRank", "ED_PDI_RS")
kw_table <- kruskal_wallis_results(df, continuous_features, "Pain_Class")
kw_table
#save kw table as csv
write.csv(kw_table, "kw_table.csv")
#run chi square test for all categorical variables
chi_sq_results <- function(df, features, group_var) {
test_feature <- function(feature) {
if (!feature %in% names(df)) {
return(data.frame(
Feature = feature,
Chi_Square = NA,
P_Value = NA
))}
chi_sq_test <- tryCatch(
chisq.test(table(df[[feature]], df[[group_var]])),
error = function(e) {
return(list(statistic = NA, p.value = NA))
})
chi_squared <- chi_sq_test$statistic
p_value <- chi_sq_test$p.value
return(data.frame(
Feature = feature,
Chi_Square = chi_squared,
P_Value = p_value
))}
results <- map_dfr(features, test_feature)
return(results)
}
categorical_features <- c("ED_GenderBirthCert", "ED_RaceEthCode", "Site_New", "ED_Event_BroadClass", "PRE_Pain_MdSv", "CTQ_Any", "PhyAbu_Any", "EmoAbu_Any", "SexAbu_Any", "PhyNeg_Any", "EmoNeg_Any", "Bullying_Any")
chi.sq_table <- chi_sq_results(df, categorical_features, "Pain_Class")
chi.sq_table
#save chi sq table as csv
write.csv(chi.sq_table, "chi_sq_table.csv")
df %>%
group_by(Pain_Class) %>%
summarize(
Mean_Age = mean(ED_Age, na.rm = TRUE),
SD_Age = sd(ED_Age, na.rm = TRUE),
Median_Age = median(ED_Age, na.rm = TRUE),
IQR_Age = IQR(ED_Age, na.rm = TRUE),
N = n())
#print number of people who had moderate or severe pain
summary_df <- data.frame(
N = nrow(df),
PRE_Pain_MdSv = sum(df$PRE_Pain_MdSv == "Yes"),
Percent_Pain = round(sum(df$PRE_Pain_MdSv == "Yes") / nrow(df) * 100, 2))
t <- table(df$PRE_Pain_MdSv)
#make the table `t` percentages
t
No Yes
1693 777
prop.table(t) * 100
No Yes
68.54251 31.45749
df$PRE_Pain_MdSv <- as.factor(df$PRE_Pain_MdSv)
df$CTQ_Any <- as.factor(df$CTQ_Any)
df$Bullying_Any <- as.factor(df$Bullying_Any)
chisq.test(table(df$PRE_Pain_MdSv, df$Bullying_Any))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$PRE_Pain_MdSv, df$Bullying_Any)
X-squared = 4.2373, df = 1, p-value = 0.03955
chisq.test(table(df$PRE_Pain_MdSv, df$CTQ_Any))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$PRE_Pain_MdSv, df$CTQ_Any)
X-squared = 23.917, df = 1, p-value = 1.006e-06
m35 <- glm(PRE_Pain_MdSv ~ CTQ_Total, data = df, family = binomial)
tidy(m35, exponentiate = TRUE, conf.int = TRUE)
m36 <- glm(PRE_Pain_MdSv ~ WK2_Bullying_Total, data = df, family = binomial)
tidy(m36, exponentiate = TRUE, conf.int = TRUE)
ELA + previous pain z score normalized
df$CTQ_Total_z <- scale(df$CTQ_Total)
df$WK2_Bullying_Total_z <- scale(df$WK2_Bullying_Total)
model_ctq_z <- glm(PRE_Pain_MdSv ~ CTQ_Total_z, data = df, family = "binomial")
tidy(model_ctq_z, exponentiate = TRUE, conf.int = TRUE)
model_bullying_z <- glm(PRE_Pain_MdSv ~ WK2_Bullying_Total_z, data = df, family = "binomial")
tidy(model_bullying_z, exponentiate = TRUE, conf.int = TRUE)
#make a csv that contains PID, Pain_Class, CTQ_Total, and Bullying score
df_Lauren <- df_traj %>% select(PID, Pain_Class, CTQ_Total, WK2_Bullying_Total)
#remove NA values from df_lauren
df_Lauren <- df_Lauren %>% drop_na()
write.csv(df_Lauren, "df_2480_Freeze4_data_Lauren.csv")
check and clean each feature data (✓) re-code trauma types~ (✓) run multicollinearity test (3) for each ctq subtypes, ttest and plot for gender (✓) for each ctq subtype, tteset and plot for race (✓) ANOVA on race vs CTQ (✓) new CTQ combo variable model (✓) run multicollinearity on newly added features (✓) add income and re-cat education to m2 (✓) base model + CTQ*race (✓)