This document contains the code for the AURORA ELA analysis published in the manuscript, Early life adversity increases risk for chronic posttraumatic pain, data from humans and rodents by McKibben et al. In this analysis, we explore the relationship between childhood trauma (measured via CTQ), bullying, and pain trajectory in the AURORA dataset.
For additional work by our research team, see Linnstaedt Lab Website
rm(list = ls())
libraries <- c("ggsignif","tidyverse", "rstatix", "corrplot", "nlme", "sjPlot", "readxl", "nnet", "chisq.posthoc.test","broom", "knitr","lmtest", "ggpubr","gt","forcats","car", "highcharter", "ggplot2", "corrr", "ggcorrplot", "gridExtra", "skimr", "pROC", "gtsummary", "dunn.test","tidyr", "purrr", "dplyr")
# Install missing libraries
#installed <- libraries[!libraries %in% installed.packages()[, "Package"]]
#if (length(installed) > 0) install.packages(installed)
# Load libraries
#lapply(libraries, library, character.only = TRUE)
suppressMessages(suppressWarnings(
invisible(lapply(libraries, library, character.only = TRUE))))
#install all libraries in the list libaries
#install.packages(libraries, dependencies = TRUE)
# Explicitly set select to refer to dplyr::select
select <- dplyr::select
gghisto <- list(
theme(axis.text.x = element_text(face="bold", color="cornflowerblue", size=14, angle= 20),
axis.text.y = element_text(face="bold", color="royalblue4",
size=16, angle=25), axis.title=element_text(size=17,face="bold"),
plot.title = element_text(size=14,face="bold.italic")))
gghisto2 <- list(
theme(axis.text.x = element_text(face="bold", color="cornflowerblue", size=8, angle=6),
axis.text.y = element_text(face="bold", color="royalblue4",
size=12), axis.title=element_text(size=14,face="bold"),
plot.title = element_text(size=14,face="bold.italic")))
color_fill <- scale_fill_manual(values = c("1" = "violetred1", "2" = "steelblue1"))
#Read in the dataset
data_1 <- read.csv("AURORA_full_update02102022_New.csv",check.names=F)[-1]
#freeze 4 datasets
AURORA_Freeze_4_general_mod <- read_csv("AURORA_Freeze_4_general_mod.csv",na = ".")
AURORA_Freeze_4_demogr_mod <- read_csv("AURORA_Freeze_4_demogr_mod.csv",na = ".")
AURORA_Freeze_4_pain_mod <- read_csv("AURORA_Freeze_4_pain_mod.csv",na = ".")
AURORA_Freeze_4_ctq_mod <- read_csv("AURORA_Freeze_4_ctq_mod.csv",na = ".")
AURORA_F4_CTQ_item <- readxl::read_excel("AURORA_F4_CTQ_item.xlsx")
AURORA_Freeze_4_pdi_mod <- read_csv("AURORA_Freeze_4_pdi_mod.csv",na = ".")
SiteID <- read_excel("SiteID.xlsx")
FZ4_df <- AURORA_Freeze_4_demogr_mod %>% inner_join(AURORA_F4_CTQ_item,by="PID") %>%
inner_join(AURORA_Freeze_4_pain_mod,by="PID") %>%
inner_join(AURORA_Freeze_4_ctq_mod,by="PID") %>%
inner_join(AURORA_Freeze_4_pdi_mod,by="PID") %>%
inner_join(SiteID,by="PID") %>%
mutate(Site_New = case_when(SiteID %in% c("Baystate","BMC", "Beth Israel", "BWH", "Cooper","Einstein","Jefferson","MGH","Miriam","Penn","Rhode Island","St. John","St. Joseph","Temple","UMass") ~ "NorthEast",SiteID %in% c("Baylor", "Emory ED", "Jacksonville","UAB","UNC","UT Houston","UT Southwestern","Vanderbilt") ~ "SouthEast", SiteID %in% c("39","49","Beaumont Royal Oak","Beaumont Troy","Eskenazi","Henry Ford","Cincinnati","Indiana","WashU","WashU DP") ~ "Midwest")) %>%
convert_as_factor(Site_New,ED_GenderBirthCert,ED_GenderNow,ED_Marital,ED_RaceEthCode,ED_highestgrade,WK2_EmploymentCode,WK2_IncomeCode) %>%
mutate_at(vars(contains("WK2_Childhood")),as.numeric) %>% dplyr::select(-SiteID) %>% #2682
mutate(WK2_Bullying_Total = WK2_ChildhoodBullying+WK2_ChildhoodHitOrHurt) %>%
inner_join(data_1 %>% dplyr::select(PID,WK8_Pain_C,M3_Pain_C,M6_Pain_C),by="PID") %>%
filter(is.na(WK2_CTQSF_SexAbu_RS) == F) %>%
filter(is.na(WK2_CTQSF_PhyAbu_RS)== F) %>%
filter(is.na(WK2_CTQSF_EmoAbu_RS)== F) %>%
filter(is.na(WK2_CTQSF_EmoNeg_RS) == F) %>%
filter(is.na(WK2_CTQSF_PhyNeg_RS) == F) %>%
filter(is.na(WK2_Bullying_Total) == F) %>% #2483
mutate(log_WK2_CTQSF_Total_RS = log2(WK2_CTQSF_Total_RS+1),log_ED_NowPain=log2(ED_NowPain+1))
# Import datasets
pain_trajectory <- read_csv("pain_latent_class_4.csv")
opioid_df <- read_csv("AURORA_Opioid_med_FZ4.csv")
PTSD_df <- read_excel("AURORA_Freeze_4_ptsd_mod.xlsx")
Trauma_df <- read_excel("AURORA_Freeze_4_trauma_mod.xlsx")
Injury_df <- read_excel("AURORA_Freeze_4_injury_mod.xlsx")
ADI_df <- read_excel("AURORA_Freeze_4_ses_mod.xlsx",na = ".")
#table() for each individual column in 3:10 for opioid_df and include NA values
opioid_df %>% select(3:10) %>% map(table)
$pre_OPIOIDS
0 1
1083 152
$ed_OPIOIDS
0 1
1540 817
$edhm_OPIOIDS
0 1
1360 403
$wk2_OPIOIDS
0 1
1256 211
$wk8_OPIOIDS
0 1
1108 122
$m3_OPIOIDS
0 1
894 75
$m6_OPIOIDS
0 1
806 63
$m12_OPIOIDS
0 1
622 48
table_list <- lapply(opioid_df[, 3:10], function(column) {
table(column, useNA = "ifany") # Include NA values
})
names(table_list) <- names(opioid_df)[3:10]
table_list
$pre_OPIOIDS
column
0 1 <NA>
1083 152 1708
$ed_OPIOIDS
column
0 1 <NA>
1540 817 586
$edhm_OPIOIDS
column
0 1 <NA>
1360 403 1180
$wk2_OPIOIDS
column
0 1 <NA>
1256 211 1476
$wk8_OPIOIDS
column
0 1 <NA>
1108 122 1713
$m3_OPIOIDS
column
0 1 <NA>
894 75 1974
$m6_OPIOIDS
column
0 1 <NA>
806 63 2074
$m12_OPIOIDS
column
0 1 <NA>
622 48 2273
# Combine datasets
df_traj <- FZ4_df %>%
inner_join(opioid_df %>% select(PID, ed_OPIOIDS), by = "PID") %>%
inner_join(Trauma_df %>% select(PID, ED_Event_BroadClass), by = "PID") %>%
inner_join(Injury_df %>% select(PID, ED_Concussion), by = "PID") %>%
inner_join(ADI_df %>% select(PID, ADI_NatRank), by = "PID") %>%
inner_join(pain_trajectory %>% select(PID, pain_Class_MostLikely), by = "PID")
Refactor pain trajectories and Relevel to “low pain” as the reference group
df_traj <- df_traj %>%
rename(Pain_Class = pain_Class_MostLikely)
df_traj <- df_traj %>%
mutate(Pain_Class = case_when(
Pain_Class == "Class 1" ~ "moderate recovery",
Pain_Class == "Class 2" ~ "moderate",
Pain_Class == "Class 3" ~ "low",
Pain_Class == "Class 4" ~ "high",
TRUE ~ as.character(Pain_Class) # This handles any unexpected values
)) %>%
mutate(Pain_Class = factor(Pain_Class)) %>%
mutate(Pain_Class = relevel(Pain_Class, ref = "low"))
Factorize and rename categorical variables
factor_vars <- c("ED_GenderBirthCert", "ED_Event_BroadClass", "ED_Marital",
"WK2_EmploymentCode", "ED_highestgrade", "ED_RaceEthCode", "PRE_Pain_MdSv")
df_traj[factor_vars] <- lapply(df_traj[factor_vars], as.factor)
#in gender, change 1 to male and 2 to female
df_traj$ED_GenderBirthCert <- ifelse(df_traj$ED_GenderBirthCert == 1, "Male", "Female")
#in pre pain, change 1 to yes and 0 to no .. is this correct?
df_traj$PRE_Pain_MdSv <- ifelse(df_traj$PRE_Pain_MdSv == 1, "Yes", "No")
# Convert continuous variables to numeric
df_traj$ED_PDI_RS <- as.numeric(df_traj$ED_PDI_RS)
df_traj$ED_Concussion <- as.numeric(df_traj$ED_Concussion)
Warning: NAs introduced by coercion
#rename WK2 CTQ total column to CTQ_Total
df_traj <- df_traj %>%
rename(CTQ_Total = WK2_CTQSF_Total_RS)
#print column headers that contain the letters "ED"
colnames(df_traj)[grep("ED", colnames(df_traj))]
[1] "ED_GenderBirthCert" "ED_GenderNow" "ED_Age" "ED_Marital"
[5] "ED_RaceEthCode" "ED_highestgrade" "ED_NowPain_YN_COUNT" "ED_NowPain_MdSv"
[9] "ED_Pain_CSNWP_Count" "ED_NowPain" "ED_PainDiff_sum" "ED_PDI_RS"
[13] "log_ED_NowPain" "ED_Event_BroadClass" "ED_Concussion"
General clean up, renaming, removing negatives
#Make any value less than 0 in the bullying total == NA
df_traj$WK2_Bullying_Total <- ifelse(df_traj$WK2_Bullying_Total < 0, NA, df_traj$WK2_Bullying_Total)
#Make any value less than 0 in ADI == NA
df_traj$ADI_NatRank <- ifelse(df_traj$ADI_NatRank < 0, NA, df_traj$ADI_NatRank)
Remove three bullying rows have negative numbers
# Remove the rows in WK2_Bullying_Total that contain negative values
df_traj <- df_traj %>% filter(WK2_Bullying_Total >= 0)
Relevel categories for trauma type
df_traj <- df_traj %>%
mutate(ED_Event_BroadClass = case_when(
ED_Event_BroadClass == "1" ~ "MVC/Non-motor Collision",
ED_Event_BroadClass == "6" ~ "MVC/Non-motor Collision",
ED_Event_BroadClass == "2" ~ "Physical/Sexual Abuse",
ED_Event_BroadClass == "3" ~ "Physical/Sexual Abuse",
ED_Event_BroadClass == "4" ~ "Other",
ED_Event_BroadClass == "5" ~ "Other",
ED_Event_BroadClass == "7" ~ "Other",
ED_Event_BroadClass == "8" ~ "Other",
ED_Event_BroadClass == "9" ~ "Other",
ED_Event_BroadClass == "10" ~ "Other",
ED_Event_BroadClass == "11" ~ "Other",
TRUE ~ as.character(ED_Event_BroadClass) # Keeps any other values unchanged
))
# Convert ED_Event broad class to factor
df_traj$ED_Event_BroadClass <- factor(df_traj$ED_Event_BroadClass)
table(df_traj$ED_Event_BroadClass)
MVC/Non-motor Collision Other Physical/Sexual Abuse
1909 340 231
Relevel categories for education
#relevel education category
# 1- did not finish HS (codes 0-12) 2- HS grad + some college (codes 13-15) 3- college grad (Bachelors or Associates) (codes 16-18) 4- post grad codes(19-21)
df_traj <- df_traj %>%
mutate(ED_highestgrade = case_when(
ED_highestgrade == "0" ~ "Did not finish HS",
ED_highestgrade == "1" ~ "Did not finish HS",
ED_highestgrade == "7" ~ "Did not finish HS",
ED_highestgrade == "8" ~ "Did not finish HS",
ED_highestgrade == "9" ~ "Did not finish HS",
ED_highestgrade == "10" ~ "Did not finish HS",
ED_highestgrade == "11" ~ "Did not finish HS",
ED_highestgrade == "12" ~ "Did not finish HS",
ED_highestgrade == "13" ~ "HS Grad/Some College",
ED_highestgrade == "14" ~ "HS Grad/Some College",
ED_highestgrade == "15" ~ "HS Grad/Some College",
ED_highestgrade == "16" ~ "Bach or Assoc degree",
ED_highestgrade == "17" ~ "Bach or Assoc degree",
ED_highestgrade == "18" ~ "Bach or Assoc degree",
ED_highestgrade == "19" ~ "Post-graduate education",
ED_highestgrade == "20" ~ "Post-graduate education",
ED_highestgrade == "21" ~ "Post-graduate education",
ED_highestgrade == "-7" ~ NA,
ED_highestgrade == "-5" ~ NA,
TRUE ~ as.character(ED_highestgrade) # Keeps any other values unchanged
))
# Convert to factor
df_traj$ED_highestgrade <- factor(df_traj$ED_highestgrade)
table(df_traj$ED_highestgrade)
Bach or Assoc degree Did not finish HS HS Grad/Some College Post-graduate education
695 267 1321 189
Re-level marriage category
#marital status 1-married 2-separated 3-divorced 4-annulled 5-widowed 6-single
#4. marriage - combine 2+3+4+5 as a single category
df_traj <- df_traj %>%
mutate(ED_Marital = case_when(
ED_Marital == "1" ~ "Married",
ED_Marital == "2" ~ "Divorced/Widowed",
ED_Marital == "3" ~ "Divorced/Widowed",
ED_Marital == "4" ~ "Divorced/Widowed",
ED_Marital == "5" ~ "Divorced/Widowed",
ED_Marital == "6" ~ "Single",
TRUE ~ as.character(ED_Marital) # Keeps any other values unchanged
))
# Convert to factor
df_traj$ED_Marital <- factor(df_traj$ED_Marital)
table(df_traj$ED_Marital)
Divorced/Widowed Married Single
446 539 1483
Subset df_traj to our final df that will be used for model analysis, keeping only the features needed.
df <- df_traj %>%
select(PID, Pain_Class,ED_Age, Site_New, ED_RaceEthCode, ED_GenderBirthCert, ADI_NatRank, CTQ_Total,
BMI, PRE_Pain_MdSv, WK2_EmploymentCode, ED_Marital, ED_highestgrade,
ED_Concussion, ED_Event_BroadClass, ED_PDI_RS, WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_EmoAbu_RS, WK2_CTQSF_SexAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoNeg_RS, WK2_Bullying_Total, ed_OPIOIDS)
#save df as csv
#write.csv(df, "df_for_RR_PAF.csv")
Add the CTQ triad score that is a combination of bullying, physical abuse, and emotional abuse scores.
#Add combination CTQ score
df$CTQ_Triad <- df$WK2_Bullying_Total + df$WK2_CTQSF_PhyAbu_RS + df$WK2_CTQSF_EmoAbu_RS
#Print the rows that have NA in the CTQ triad
df %>% filter(is.na(CTQ_Triad))
#Confirms that for the CTQ triad, if any of the additive features contain NA, then the final score is NA
#Add the two different types of bullying question
df <- df %>%
left_join(FZ4_df %>% select(PID, BulliedSimple = 20, BulliedHitOrHurt = 21), by = "PID")
#Remove first column, PID, as this is unneeded
df <- df %>% select(-PID)
#make all of df[,c(2,6:8)]) numeric
df[,c(2,6:8)] <- lapply(df[,c(2,6:8)], as.numeric)
Binary scores for later models
df$Bullying_Any <- ifelse(df$WK2_Bullying_Total > 0, 1, 0)
df$PhyAbu_Any <- ifelse(df$WK2_CTQSF_PhyAbu_RS > 0, 1, 0)
df$EmoAbu_Any <- ifelse(df$WK2_CTQSF_EmoAbu_RS > 0, 1, 0)
df$SexAbu_Any <- ifelse(df$WK2_CTQSF_SexAbu_RS > 0, 1, 0)
df$PhyNeg_Any <- ifelse(df$WK2_CTQSF_PhyNeg_RS > 0, 1, 0)
df$EmoNeg_Any <- ifelse(df$WK2_CTQSF_EmoNeg_RS > 0, 1, 0)
df$CTQ_Any <- ifelse(df$CTQ_Total > 0, 1, 0)
Z-scaling is a method of standardizing the data so that it has a mean
of 0 and a standard deviation of 1. This is done to make the data more
interpretable and to ensure that the variables are on the same scale.
The scale function works by subtracting the mean of the
data from each value and then dividing by the standard deviation.
#scale
#df[, c(7, 16:22)] <- scale(df[, c(7, 16:22)])
#df[, c(7, 16:22)] <- lapply(df[, c(7, 16:22)], function(x) as.numeric(scale(x)))
summary(df)
Pain_Class ED_Age Site_New ED_RaceEthCode ED_GenderBirthCert
low :1070 Min. :18.00 Midwest : 960 1 : 278 Length:2480
high : 352 1st Qu.:25.00 NorthEast:1053 2 : 891 Class :character
moderate : 590 Median :33.00 SouthEast: 467 3 :1213 Mode :character
moderate recovery: 468 Mean :36.09 4 : 91
3rd Qu.:46.00 NA's: 7
Max. :74.00
ADI_NatRank CTQ_Total BMI PRE_Pain_MdSv WK2_EmploymentCode
Min. : 2.00 Min. : 0.000 Min. : 8.40 Length:2480 1 :1816
1st Qu.: 41.00 1st Qu.: 1.000 1st Qu.:24.10 Class :character 2 : 65
Median : 66.00 Median : 6.000 Median :28.40 Mode :character 3 : 55
Mean : 63.93 Mean : 9.502 Mean :30.13 4 : 96
3rd Qu.: 91.00 3rd Qu.:16.000 3rd Qu.:34.50 5 : 446
Max. :100.00 Max. :44.000 Max. :84.50 NA's: 2
NA's :80 NA's :158
ED_Marital ED_highestgrade ED_Concussion
Divorced/Widowed: 446 Bach or Assoc degree : 695 Min. :0.00000
Married : 539 Did not finish HS : 267 1st Qu.:0.00000
Single :1483 HS Grad/Some College :1321 Median :0.00000
NA's : 12 Post-graduate education: 189 Mean :0.04518
NA's : 8 3rd Qu.:0.00000
Max. :1.00000
NA's :1
ED_Event_BroadClass ED_PDI_RS WK2_CTQSF_PhyAbu_RS WK2_CTQSF_EmoAbu_RS
MVC/Non-motor Collision:1909 Min. : 0.00 Min. :0.00 Min. :0.000
Other : 340 1st Qu.: 8.00 1st Qu.:0.00 1st Qu.:0.000
Physical/Sexual Abuse : 231 Median :14.00 Median :0.00 Median :2.000
Mean :13.92 Mean :1.56 Mean :2.556
3rd Qu.:19.00 3rd Qu.:2.25 3rd Qu.:4.000
Max. :32.00 Max. :8.00 Max. :8.000
NA's :139
WK2_CTQSF_SexAbu_RS WK2_CTQSF_PhyNeg_RS WK2_CTQSF_EmoNeg_RS WK2_Bullying_Total ed_OPIOIDS
Min. : 0.000 Min. :0.00 Min. :0.000 Min. :0.000 Min. :0.0000
1st Qu.: 0.000 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.0000
Median : 0.000 Median :0.00 Median :1.000 Median :3.000 Median :0.0000
Mean : 1.932 Mean :1.53 Mean :1.923 Mean :2.966 Mean :0.3425
3rd Qu.: 3.000 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:1.0000
Max. :12.000 Max. :8.00 Max. :8.000 Max. :8.000 Max. :1.0000
NA's :489
CTQ_Triad BulliedSimple BulliedHitOrHurt Bullying_Any PhyAbu_Any
Min. : 0.000 Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.: 2.000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
Median : 5.000 Median :1.000 Median :0.0000 Median :1.0000 Median :0.0000
Mean : 7.082 Mean :1.187 Mean :0.8198 Mean :0.7972 Mean :0.4391
3rd Qu.:11.000 3rd Qu.:2.000 3rd Qu.:2.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
Max. :24.000 Max. :4.000 Max. :4.0000 Max. :1.0000 Max. :1.0000
EmoAbu_Any SexAbu_Any PhyNeg_Any EmoNeg_Any CTQ_Any
Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
Median :1.000 Median :0.0000 Median :0.0000 Median :1.0000 Median :1.0000
Mean :0.656 Mean :0.3496 Mean :0.4621 Mean :0.5177 Mean :0.7972
3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
#print median of each ctq type and bullying
df %>% summarize(median(CTQ_Total), median(WK2_CTQSF_PhyAbu_RS), median(WK2_CTQSF_EmoAbu_RS), median(WK2_CTQSF_SexAbu_RS), median(WK2_CTQSF_PhyNeg_RS), median(WK2_CTQSF_EmoNeg_RS), median(WK2_Bullying_Total))
#make boxplots of each ctq type and bullying
#bullying has the highest median after z-score transforming, so it has the highest values
df %>% pivot_longer(cols = c(7, 17:22), names_to = "CTQ_Type", values_to = "CTQ_Score") %>%
ggplot(aes(x = CTQ_Type, y = CTQ_Score)) +
geom_boxplot() +
labs(title = "Boxplot of CTQ Scores", x = "CTQ Type", y = "CTQ Score") + gghisto
Table 1: Socio-demographic variables
table1a_features <- c("ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank")
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1a_features)) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2)))
| Characteristic | N = 2,4801 |
|---|---|
| ED_GenderBirthCert | |
| Female | 1,554 (63%) |
| Male | 926 (37%) |
| ED_Age | 36.09 (13.31) |
| ED_RaceEthCode | |
| 1 | 278 (11%) |
| 2 | 891 (36%) |
| 3 | 1,213 (49%) |
| 4 | 91 (3.7%) |
| Unknown | 7 |
| BMI | 30.13 (8.32) |
| Unknown | 158 |
| ADI_NatRank | 63.93 (27.57) |
| Unknown | 80 |
| 1 n (%); Mean (SD) | |
Table 1: ED/Trauma-related variable.names
table1b_features <- c("Site_New", "ED_Event_BroadClass", "ED_PDI_RS")
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1b_features)) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2)))
| Characteristic | N = 2,4801 |
|---|---|
| Site_New | |
| Midwest | 960 (39%) |
| NorthEast | 1,053 (42%) |
| SouthEast | 467 (19%) |
| ED_Event_BroadClass | |
| MVC/Non-motor Collision | 1,909 (77%) |
| Other | 340 (14%) |
| Physical/Sexual Abuse | 231 (9.3%) |
| ED_PDI_RS | 13.92 (7.24) |
| Unknown | 139 |
| 1 n (%); Mean (SD) | |
Table 1: Past pain/stress variables
table1c_features <- c("PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")
# Create a named list to specify the type of each variable
type_list <- list(
WK2_CTQSF_PhyAbu_RS = "continuous",
WK2_CTQSF_EmoAbu_RS = "continuous",
WK2_CTQSF_SexAbu_RS = "continuous",
WK2_CTQSF_PhyNeg_RS = "continuous",
WK2_CTQSF_EmoNeg_RS = "continuous",
WK2_Bullying_Total = "continuous")
# Generate the summary table
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1c_features)) %>%
tbl_summary(
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2,
type = type_list)))
| Characteristic | N = 2,4801 |
|---|---|
| PRE_Pain_MdSv | 777 (31%) |
| Unknown | 10 |
| CTQ_Any | 1,977 (80%) |
| CTQ_Total | 9.50 (9.79) |
| PhyAbu_Any | 1,089 (44%) |
| WK2_CTQSF_PhyAbu_RS | 1.56 (2.31) |
| EmoAbu_Any | 1,627 (66%) |
| WK2_CTQSF_EmoAbu_RS | 2.56 (2.64) |
| SexAbu_Any | 867 (35%) |
| WK2_CTQSF_SexAbu_RS | 1.93 (3.35) |
| PhyNeg_Any | 1,146 (46%) |
| WK2_CTQSF_PhyNeg_RS | 1.53 (2.14) |
| EmoNeg_Any | 1,284 (52%) |
| WK2_CTQSF_EmoNeg_RS | 1.92 (2.36) |
| Bullying_Any | 1,977 (80%) |
| WK2_Bullying_Total | 2.97 (2.37) |
| 1 n (%); Mean (SD) | |
#calculate the % of participants reported experiencing at least one instance of any type of ELA (i.e. either CTQ≥1 or childhood Bullying ≥1) and the M (SD) for composite CTQ
df %>%
summarize(perc_any_ELA = mean(CTQ_Total >= 1 | WK2_Bullying_Total >= 1, na.rm = TRUE),
mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
sd_CTQ = sd(CTQ_Total, na.rm = TRUE))
How many people had at a score of 1 or more for at least two different CTQ subtypes?
# Create a logical matrix where TRUE indicates a score of 1 or more
logical_matrix <- df %>%
select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS,
WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
mutate(across(everything(), ~ . >= 1))
# Count the number of TRUE values in each row
df$ctq_total_count <- rowSums(logical_matrix)
# Filter individuals with at least 2 CTQ subtypes with scores of 1 or more
sum(df$ctq_total_count >= 2)
[1] 1906
#what is the most common ctq subtype with percentages out of 2480
df %>%
select(WK2_CTQSF_PhyAbu_RS, WK2_CTQSF_PhyNeg_RS, WK2_CTQSF_EmoAbu_RS,
WK2_CTQSF_EmoNeg_RS, WK2_CTQSF_SexAbu_RS, WK2_Bullying_Total) %>%
pivot_longer(cols = everything(), names_to = "CTQ_Subtype", values_to = "Score") %>%
filter(Score >= 1) %>%
count(CTQ_Subtype) %>%
arrange(desc(n)) %>%
mutate(perc = n/2480)
#how many cases in the df had either bullying score 1+ or total CTQ score 1+
df %>% filter(CTQ_Total >= 1 | WK2_Bullying_Total >= 1) %>% nrow()
[1] 2260
#ttest comparing ctq in men vs women
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)
Welch Two Sample t-test
data: CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.535354 3.036842
sample estimates:
mean in group Female mean in group Male
10.355212 8.069114
#caclulcate std in addition to means for sex
df %>%
group_by(ED_GenderBirthCert) %>%
summarize(mean_CTQ = mean(CTQ_Total, na.rm = TRUE),
sd_CTQ = sd(CTQ_Total, na.rm = TRUE))
#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_Bullying_Total by ED_GenderBirthCert
t = 0.093303, df = 2028.3, p-value = 0.9257
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.1815477 0.1996852
sample estimates:
mean in group Female mean in group Male
2.969112 2.960043
#In participants with CTQ=0, what percentage and count of participants are in each pain trajectory group?
df %>%
filter(CTQ_Total == 0) %>%
count(Pain_Class) %>%
mutate(perc = n/sum(n))
# Function to calculate Spearman correlations, p-values, and number of complete cases
correlation_summary <- function(x, y) {
cor_test <- cor.test(df[[x]], df[[y]], method = "spearman")
tibble(
Variable1 = x,
Variable2 = y,
Rho = cor_test$estimate,
P_Value = cor_test$p.value,
)
}
# Define the columns to correlate
columns <- colnames(df)[16:22]
# Create a summary table
all_ctq_correlations <- expand_grid(Variable1 = columns, Variable2 = columns) %>%
filter(Variable1 != Variable2) %>%
pmap_dfr(~ correlation_summary(..1, ..2))
#save CTQ correlations to a CSV
#write.csv(all_ctq_correlations, "all_ELA_correlations_for_Lauren.csv")
Heatmap of correlations
# Compute the correlation matrix
cor_matrix <- cor(df[,c(16:22)], use = "complete.obs", method="spearman")
# Plot the correlation matrix
# Supplementary Figure 1
ggcorrplot(cor_matrix, lab = TRUE)
#Look at the relationship between ED_GenderBirthCert and CTQ_Total
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.535354 3.036842
sample estimates:
mean in group Female mean in group Male
10.355212 8.069114
p1 <- ggplot(df, aes(x = ED_GenderBirthCert, y = CTQ_Total)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ TOTAL by GENDER",
x = "GENDER",
y = "CTQ TOTAL") + gghisto
# Sexual assault CTQ x Sex
t.test(WK2_CTQSF_SexAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_SexAbu_RS by ED_GenderBirthCert
t = 10.874, df = 2462.5, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.095160 1.577066
sample estimates:
mean in group Female mean in group Male
2.431145 1.095032
p2 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_SexAbu_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ Sexual Abuse by GENDER",
x = "GENDER",
y = "CTQ Sexual Abuse") + gghisto
# physical abuse x Sex
t.test(WK2_CTQSF_PhyAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_PhyAbu_RS by ED_GenderBirthCert
t = 3.8607, df = 2199.9, p-value = 0.0001163
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
0.1738492 0.5327784
sample estimates:
mean in group Female mean in group Male
1.692407 1.339093
p3 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyAbu_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ Physical Abuse by GENDER",
x = "GENDER",
y = "CTQ phy Abuse") + gghisto
# physical neglect x sex
t.test(WK2_CTQSF_PhyNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_PhyNeg_RS by ED_GenderBirthCert
t = -2.0304, df = 1876.9, p-value = 0.04246
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.359130499 -0.006219423
sample estimates:
mean in group Female mean in group Male
1.462033 1.644708
p4 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_PhyNeg_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ Physical Neglect by GENDER",
x = "GENDER",
y = "CTQ phy neglect") + gghisto
#emotional abuse x sex
t.test(WK2_CTQSF_EmoAbu_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_EmoAbu_RS by ED_GenderBirthCert
t = 8.046, df = 2232.8, p-value = 1.375e-15
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
0.6276099 1.0321326
sample estimates:
mean in group Female mean in group Male
2.865508 2.035637
p5 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoAbu_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ Emotional Abuse by GENDER",
x = "GENDER",
y = "CTQ emo abuse") + gghisto
#emotional neglect x sex
t.test(WK2_CTQSF_EmoNeg_RS ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_CTQSF_EmoNeg_RS by ED_GenderBirthCert
t = -0.51873, df = 1971.3, p-value = 0.604
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.2415469 0.1404965
sample estimates:
mean in group Female mean in group Male
1.904118 1.954644
p6 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_CTQSF_EmoNeg_RS)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "CTQ Emotional Neglect by GENDER",
x = "GENDER",
y = "CTQ emo neglect") + gghisto
#bullying x sex
p7 <- ggplot(df, aes(x = ED_GenderBirthCert, y = WK2_Bullying_Total)) +
geom_boxplot(lwd = .9, fill=c("pink","skyblue")) +
labs(title = "Bullying by Gender",
x = " ",
y = "Bullying Score") + gghisto
ctq.sex.plots <- grid.arrange(p1, p2, p3, p4, p5, p6, p7, ncol = 3)
#save these plots to a pdf
#ggsave("ctq.sex.plots.pdf", ctq.sex.plots, width = 12, height = 10)
#welches t test for sex differences in bullying
t.test(WK2_Bullying_Total ~ ED_GenderBirthCert, data = df,var.equal = FALSE)
Welch Two Sample t-test
data: WK2_Bullying_Total by ED_GenderBirthCert
t = 0.093303, df = 2028.3, p-value = 0.9257
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.1815477 0.1996852
sample estimates:
mean in group Female mean in group Male
2.969112 2.960043
#ttest of ctq total by sex
t.test(CTQ_Total ~ ED_GenderBirthCert, data = df)
Welch Two Sample t-test
data: CTQ_Total by ED_GenderBirthCert
t = 5.9715, df = 2265.3, p-value = 2.722e-09
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
1.535354 3.036842
sample estimates:
mean in group Female mean in group Male
10.355212 8.069114
#Look at the relationship between RACE and each CTQ category
# List of CTQ categories and their corresponding columns
ctq_tests <- list(
CTQ_Total = "CTQ_Total",
Physical_Abuse = "WK2_CTQSF_PhyAbu_RS",
Physical_Neglect = "WK2_CTQSF_PhyNeg_RS",
Emotional_Abuse = "WK2_CTQSF_EmoAbu_RS",
Emotional_Neglect = "WK2_CTQSF_EmoNeg_RS",
Sexual_Abuse = "WK2_CTQSF_SexAbu_RS",
Bullying = "WK2_Bullying_Total"
)
# Initialize a vector to store significant results
significant_tests <- c()
# Perform Kruskal-Wallis test and Dunn's test for each CTQ category
for (name in names(ctq_tests)) {
cat("Testing:", name, "\n")
kw_test <- kruskal.test(df[[ctq_tests[[name]]]] ~ df$ED_RaceEthCode)
print(kw_test)
if (kw_test$p.value < 0.05) {
cat("Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...\n")
dunn_result <- dunn.test(df[[ctq_tests[[name]]]], df$ED_RaceEthCode, method = "bh")
print(dunn_result)
significant_tests <- c(significant_tests, name)
} else {
cat("Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.\n")
}
cat("\n")
}
Testing: CTQ_Total
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 7.7918, df = 3, p-value = 0.05052
Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.
Testing: Physical_Abuse
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 7.5855, df = 3, p-value = 0.0554
Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.
Testing: Physical_Neglect
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 16.152, df = 3, p-value = 0.001056
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 16.1517, df = 3, p-value = 0
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | 1 2 3
---------+---------------------------------
2 | 3.850210
| 0.0004*
|
3 | 2.334541 -2.476521
| 0.0196* 0.0199*
|
4 | 1.280521 -0.998172 -0.005352
| 0.1503 0.1909 0.4979
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 16.1517
$Z
[1] 3.85021063 2.33454133 -2.47652115 1.28052161 -0.99817227 -0.00535235
$P
[1] 5.900814e-05 9.783697e-03 6.633488e-03 1.001809e-01 1.590979e-01 4.978647e-01
$P.adjusted
[1] 0.0003540489 0.0195673948 0.0199004649 0.1502713109 0.1909174969 0.4978647316
$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"
Testing: Emotional_Abuse
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 21.301, df = 3, p-value = 9.116e-05
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 21.3011, df = 3, p-value = 0
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | 1 2 3
---------+---------------------------------
2 | -0.616834
| 0.2687
|
3 | 2.330242 4.472250
| 0.0297 0.0000*
|
4 | 0.717342 1.172279 -0.628512
| 0.3549 0.2411 0.3178
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 21.30109
$Z
[1] -0.6168349 2.3302421 4.4722501 0.7173426 1.1722795 -0.6285124
$P
[1] 2.686718e-01 9.896680e-03 3.870042e-06 2.365814e-01 1.205424e-01 2.648341e-01
$P.adjusted
[1] 2.686718e-01 2.969004e-02 2.322025e-05 3.548720e-01 2.410849e-01 3.178010e-01
$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"
Testing: Emotional_Neglect
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 6.728, df = 3, p-value = 0.08109
Kruskal-Wallis test not significant (p >= 0.05). Skipping Dunn's test.
Testing: Sexual_Abuse
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 9.7111, df = 3, p-value = 0.02119
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 9.7111, df = 3, p-value = 0.02
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | 1 2 3
---------+---------------------------------
2 | 2.585689
| 0.0292*
|
3 | 0.965636 -2.570674
| 0.3342 0.0152*
|
4 | 0.905601 -0.620256 0.415519
| 0.2739 0.3211 0.3389
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 9.711097
$Z
[1] 2.5856895 0.9656370 -2.5706744 0.9056014 -0.6202568 0.4155199
$P
[1] 0.004859222 0.167112936 0.005075035 0.182573441 0.267544376 0.338880672
$P.adjusted
[1] 0.02915533 0.33422587 0.01522510 0.27386016 0.32105325 0.33888067
$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"
Testing: Bullying
Kruskal-Wallis rank sum test
data: df[[ctq_tests[[name]]]] by df$ED_RaceEthCode
Kruskal-Wallis chi-squared = 14.311, df = 3, p-value = 0.002511
Kruskal-Wallis test significant (p < 0.05). Performing Dunn's test...
Kruskal-Wallis rank sum test
data: x and group
Kruskal-Wallis chi-squared = 14.3111, df = 3, p-value = 0
Comparison of x by group
(Benjamini-Hochberg)
Col Mean-|
Row Mean | 1 2 3
---------+---------------------------------
2 | -1.028753
| 0.2277
|
3 | 1.381831 3.684295
| 0.1670 0.0007*
|
4 | 0.857417 1.583135 0.107357
| 0.2347 0.1701 0.4573
alpha = 0.05
Reject Ho if p <= alpha/2
$chi2
[1] 14.31115
$Z
[1] -1.0287538 1.3818314 3.6842953 0.8574171 1.5831355 0.1073575
$P
[1] 0.1517976943 0.0835117408 0.0001146681 0.1956072094 0.0566952918 0.4572526697
$P.adjusted
[1] 0.2276965414 0.1670234816 0.0006880087 0.2347286513 0.1700858754 0.4572526697
$comparisons
[1] "1 - 2" "1 - 3" "2 - 3" "1 - 4" "2 - 4" "3 - 4"
# Print names of significant CTQ categories
if (length(significant_tests) > 0) {
cat("CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:\n")
cat(paste(significant_tests, collapse = "\n"))
} else {
cat("No CTQ categories with significant Kruskal-Wallis test.\n")
}
CTQ categories with significant Kruskal-Wallis test and corresponding Dunn's test results:
Physical_Neglect
Emotional_Abuse
Sexual_Abuse
Bullying
df %>%
group_by(ED_RaceEthCode) %>%
summarize(
Mean_CTQ_Tot = mean(CTQ_Total, na.rm = TRUE),
SD_CTQ_Tot = sd(CTQ_Total, na.rm = TRUE),
Mean_Phys_Abuse = mean(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
SD_Phys_Abuse = sd(WK2_CTQSF_PhyAbu_RS, na.rm = TRUE),
Mean_Phys_Neglect = mean(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
SD_Phys_Neglect = sd(WK2_CTQSF_PhyNeg_RS, na.rm = TRUE),
Mean_Emo_Abuse = mean(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
SD_Emo_Abuse = sd(WK2_CTQSF_EmoAbu_RS, na.rm = TRUE),
Mean_Emo_Neglect = mean(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
SD_Emo_Neglect = sd(WK2_CTQSF_EmoNeg_RS, na.rm = TRUE),
Mean_Sex_Abuse = mean(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
SD_Sex_Abuse = sd(WK2_CTQSF_SexAbu_RS, na.rm = TRUE),
Mean_Bullying = mean(WK2_Bullying_Total, na.rm = TRUE),
SD_Bullying = sd(WK2_Bullying_Total, na.rm = TRUE)
) %>%
gt() %>%
tab_header(
title = "Mean and Standard Deviation of CTQ Scores by Race"
) %>%
fmt_number(
columns = vars(starts_with("Mean"), starts_with("SD")),
decimals = 2
)
| Mean and Standard Deviation of CTQ Scores by Race | ||||||||||||||
| ED_RaceEthCode | Mean_CTQ_Tot | SD_CTQ_Tot | Mean_Phys_Abuse | SD_Phys_Abuse | Mean_Phys_Neglect | SD_Phys_Neglect | Mean_Emo_Abuse | SD_Emo_Abuse | Mean_Emo_Neglect | SD_Emo_Neglect | Mean_Sex_Abuse | SD_Sex_Abuse | Mean_Bullying | SD_Bullying |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 10.91 | 10.37 | 1.90 | 2.48 | 1.82 | 2.16 | 2.75 | 2.72 | 2.18 | 2.34 | 2.26 | 3.62 | 3.04 | 2.44 |
| 2 | 8.93 | 9.32 | 1.48 | 2.25 | 1.27 | 1.86 | 2.81 | 2.68 | 1.78 | 2.18 | 1.58 | 2.97 | 3.16 | 2.26 |
| 3 | 9.62 | 10.03 | 1.54 | 2.30 | 1.65 | 2.31 | 2.33 | 2.57 | 1.98 | 2.48 | 2.13 | 3.56 | 2.83 | 2.43 |
| 4 | 9.44 | 9.15 | 1.68 | 2.35 | 1.62 | 2.24 | 2.48 | 2.59 | 1.91 | 2.37 | 1.75 | 2.86 | 2.77 | 2.34 |
| NA | 6.71 | 7.61 | 0.57 | 1.13 | 1.00 | 1.15 | 2.29 | 3.09 | 1.29 | 1.80 | 1.57 | 3.36 | 2.29 | 3.15 |
# CTQ total x Race
p7 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = CTQ_Total, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Boxplot of CTQ TOTAL by RACE",
x = "RACE", y = "CTQ TOTAL") + gghisto
# Sexual assault CTQ x Race
p8 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_SexAbu_RS, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Boxplot of CTQ Sexual Abuse by RACE",
x = "RACE", y = "CTQ Sexual Abuse") + gghisto
# physical abuse x Race
p9 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyAbu_RS, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Boxplot of CTQ Physical Abuse by RACE",
x = "RACE", y = "CTQ phy Abuse") + gghisto
# physical neglect x Race
p10 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_PhyNeg_RS, fill = ED_RaceEthCode)) +
geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "Boxplot of CTQ Physical Neglect by RACE",
x = "RACE", y = "CTQ phy neglect") + gghisto
#emotional abuse x Race
p11 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoAbu_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) +
theme(legend.position = "none") +
labs(title = "CTQ Emotional Abuse by RACE",
x = "RACE", y = "CTQ emo abuse") + gghisto
#emotional neglect x Race
p12 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_CTQSF_EmoNeg_RS, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "CTQ Emotional Neglect by RACE",
x = "RACE", y = "CTQ emo neglect") +
theme(legend.position = "none") + gghisto
#bullying x Race
p13 <- ggplot(df %>% filter(!is.na(ED_RaceEthCode)), aes(x = ED_RaceEthCode, y = WK2_Bullying_Total, fill = ED_RaceEthCode)) + geom_boxplot(lwd = .9) + labs(title = "Bullying by Race",
x = " ", y = "Bullying Score") +
theme(legend.position = "none") + gghisto
ctq.race.plots_simple <- grid.arrange(p7, p8, p9, p10, p11, p12, p13, ncol = 3)
#save as pdf
#ggsave("ctq.race.plots_simple.pdf", ctq.race.plots_simple, width = 12, height = 10)
cor_results <- list(
"ADI vs PDI" = cor.test(df$ADI_NatRank, df$ED_PDI_RS, use="complete.obs", method = "spearman"),
"ADI vs CTQ" = cor.test(df$ADI_NatRank, df$CTQ_Total, use="complete.obs", method = "spearman"),
"PDI vs CTQ" = cor.test(df$ED_PDI_RS, df$CTQ_Total, use="complete.obs", method = "spearman"))
#print the number of complete cases for each of ADI, PDI, and CTQ
sum(complete.cases(df$ADI_NatRank))
[1] 2400
sum(complete.cases(df$ED_PDI_RS))
[1] 2341
sum(complete.cases(df$CTQ_Total))
[1] 2480
#print the number of complete cases overlapping between PDI and ADI
sum(complete.cases(df$ADI_NatRank, df$ED_PDI_RS))
[1] 2263
data.frame(
Comparison = names(cor_results),
rho = sapply(cor_results, function(x) round(x$estimate, 6)),
p_value = sapply(cor_results, function(x) round(x$p.value, 6)))
#plot of PDI x CTQ
p1 <- ggplot(df, aes(x = ED_PDI_RS, y = CTQ_Total)) +
geom_point(alpha = 0.3, size=3) +
geom_smooth(method = "lm", se = FALSE, color = "magenta3", lwd=3) +
stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) +
labs(title = "Scatter Plot of PDI vs CTQ Total",
x = "PDI", y = "CTQ Total") + gghisto
#Plot of ADI x CTQ
p2 <- ggplot(df, aes(x = ADI_NatRank, y = CTQ_Total)) +
geom_point(alpha = 0.3, size=3) +
geom_smooth(method = "lm", se = FALSE, color = "purple3", lwd=3) +
stat_cor(method = "spearman", label.x = 3, label.y = max(df$CTQ_Total)-2) +
labs(title = "Scatter Plot of ADI vs CTQ Total",
x = "ADI", y = "CTQ Total") + gghisto
grid.arrange(p1,p2)
table1_class_features <- c("Pain_Class","ED_GenderBirthCert", "ED_Age", "ED_RaceEthCode","BMI", "ADI_NatRank","Site_New", "ED_Event_BroadClass", "ED_PDI_RS","PRE_Pain_MdSv", "CTQ_Any","CTQ_Total","PhyAbu_Any", "WK2_CTQSF_PhyAbu_RS", "EmoAbu_Any","WK2_CTQSF_EmoAbu_RS", "SexAbu_Any", "WK2_CTQSF_SexAbu_RS", "PhyNeg_Any", "WK2_CTQSF_PhyNeg_RS", "EmoNeg_Any", "WK2_CTQSF_EmoNeg_RS", "Bullying_Any", "WK2_Bullying_Total")
# Create a named list to specify the type of each variable
type_list <- list(
WK2_CTQSF_PhyAbu_RS = "continuous",
WK2_CTQSF_EmoAbu_RS = "continuous",
WK2_CTQSF_SexAbu_RS = "continuous",
WK2_CTQSF_PhyNeg_RS = "continuous",
WK2_CTQSF_EmoNeg_RS = "continuous",
WK2_Bullying_Total = "continuous")
# Generate the summary table and compare across class and add p
suppressMessages(suppressWarnings(
df %>%
select(all_of(table1_class_features)) %>%
tbl_summary(
by = Pain_Class,
statistic = all_continuous() ~ "{mean} ({sd})",
digits = all_continuous() ~ 2,
type = type_list) %>% add_p))
| Characteristic | low N = 1,0701 |
high N = 3521 |
moderate N = 5901 |
moderate recovery N = 4681 |
p-value2 |
|---|---|---|---|---|---|
| ED_GenderBirthCert | <0.001 | ||||
| Female | 605 (57%) | 237 (67%) | 409 (69%) | 303 (65%) | |
| Male | 465 (43%) | 115 (33%) | 181 (31%) | 165 (35%) | |
| ED_Age | 33.36 (12.52) | 40.89 (13.19) | 37.61 (13.12) | 36.81 (14.00) | <0.001 |
| ED_RaceEthCode | <0.001 | ||||
| 1 | 122 (11%) | 37 (11%) | 77 (13%) | 42 (9.0%) | |
| 2 | 387 (36%) | 93 (26%) | 201 (34%) | 210 (45%) | |
| 3 | 521 (49%) | 210 (60%) | 285 (49%) | 197 (42%) | |
| 4 | 39 (3.6%) | 12 (3.4%) | 23 (3.9%) | 17 (3.6%) | |
| Unknown | 1 | 0 | 4 | 2 | |
| BMI | 29.17 (7.95) | 32.15 (9.44) | 30.95 (8.28) | 29.75 (7.93) | <0.001 |
| Unknown | 78 | 21 | 38 | 21 | |
| ADI_NatRank | 62.03 (28.04) | 70.74 (26.08) | 66.28 (26.76) | 60.04 (27.52) | <0.001 |
| Unknown | 41 | 8 | 14 | 17 | |
| Site_New | 0.3 | ||||
| Midwest | 432 (40%) | 139 (39%) | 216 (37%) | 173 (37%) | |
| NorthEast | 441 (41%) | 136 (39%) | 267 (45%) | 209 (45%) | |
| SouthEast | 197 (18%) | 77 (22%) | 107 (18%) | 86 (18%) | |
| ED_Event_BroadClass | 0.12 | ||||
| MVC/Non-motor Collision | 814 (76%) | 268 (76%) | 469 (79%) | 358 (76%) | |
| Other | 152 (14%) | 40 (11%) | 76 (13%) | 72 (15%) | |
| Physical/Sexual Abuse | 104 (9.7%) | 44 (13%) | 45 (7.6%) | 38 (8.1%) | |
| ED_PDI_RS | 12.49 (7.09) | 15.81 (7.33) | 15.00 (7.07) | 14.49 (7.13) | <0.001 |
| Unknown | 43 | 27 | 42 | 27 | |
| PRE_Pain_MdSv | 189 (18%) | 214 (61%) | 251 (43%) | 123 (27%) | <0.001 |
| Unknown | 2 | 1 | 3 | 4 | |
| CTQ_Any | 805 (75%) | 301 (86%) | 490 (83%) | 381 (81%) | <0.001 |
| CTQ_Total | 8.02 (9.02) | 12.09 (10.63) | 10.75 (10.33) | 9.37 (9.54) | <0.001 |
| PhyAbu_Any | 397 (37%) | 193 (55%) | 298 (51%) | 201 (43%) | <0.001 |
| WK2_CTQSF_PhyAbu_RS | 1.23 (2.08) | 2.30 (2.74) | 1.85 (2.41) | 1.38 (2.13) | <0.001 |
| EmoAbu_Any | 623 (58%) | 260 (74%) | 416 (71%) | 328 (70%) | <0.001 |
| WK2_CTQSF_EmoAbu_RS | 2.07 (2.45) | 3.40 (2.92) | 2.94 (2.71) | 2.55 (2.51) | <0.001 |
| SexAbu_Any | 297 (28%) | 148 (42%) | 238 (40%) | 184 (39%) | <0.001 |
| WK2_CTQSF_SexAbu_RS | 1.43 (2.94) | 2.63 (3.87) | 2.37 (3.65) | 2.00 (3.24) | <0.001 |
| PhyNeg_Any | 460 (43%) | 177 (50%) | 297 (50%) | 212 (45%) | 0.012 |
| WK2_CTQSF_PhyNeg_RS | 1.47 (2.22) | 1.69 (2.17) | 1.61 (2.08) | 1.44 (2.03) | 0.028 |
| EmoNeg_Any | 525 (49%) | 189 (54%) | 327 (55%) | 243 (52%) | 0.078 |
| WK2_CTQSF_EmoNeg_RS | 1.81 (2.36) | 2.07 (2.43) | 1.98 (2.27) | 2.00 (2.39) | 0.080 |
| Bullying_Any | 814 (76%) | 290 (82%) | 476 (81%) | 397 (85%) | <0.001 |
| WK2_Bullying_Total | 2.60 (2.25) | 3.63 (2.63) | 3.17 (2.42) | 3.05 (2.25) | <0.001 |
| 1 n (%); Mean (SD) | |||||
| 2 Pearson’s Chi-squared test; Kruskal-Wallis rank sum test | |||||
p1 <- ggplot(df_traj %>% filter(!is.na(ED_Event_BroadClass)), aes(x = ED_Event_BroadClass, fill=ED_Event_BroadClass)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Trauma Type", y = "Count") + gghisto2
p2 <- ggplot(df_traj %>% filter(!is.na(ED_highestgrade)), aes(x = ED_highestgrade, fill=ED_highestgrade)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Education",
y = "Count") + gghisto2
p3 <- ggplot(df_traj %>% filter(!is.na(ED_Marital)), aes(x = ED_Marital, fill=ED_Marital)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Marital Status",
y = "Count") + gghisto2
p4 <- ggplot(df_traj %>% filter(!is.na(Pain_Class)), aes(x = Pain_Class, fill=Pain_Class)) +
geom_bar(color="black") + theme(legend.position = "none") +
geom_text(stat = 'count', aes(label = ..count..), vjust = 1.5) +
labs(title = "Counts of Pain Class",
y = "Count") + gghisto2
grid.arrange(p1, p2, p3, p4, ncol = 2)
#run chi square test for all categorical variables
chi_sq_results <- function(df, features, group_var) {
test_feature <- function(feature) {
if (!feature %in% names(df)) {
return(data.frame(
Feature = feature,
Chi_Square = NA,
P_Value = NA
))}
chi_sq_test <- tryCatch(
chisq.test(table(df[[feature]], df[[group_var]])),
error = function(e) {
return(list(statistic = NA, p.value = NA))
})
chi_squared <- chi_sq_test$statistic
p_value <- chi_sq_test$p.value
return(data.frame(
Feature = feature,
Chi_Square = chi_squared,
P_Value = p_value
))}
results <- map_dfr(features, test_feature)
return(results)
}
categorical_features <- c("ED_GenderBirthCert", "ED_RaceEthCode", "Site_New", "ED_Event_BroadClass", "PRE_Pain_MdSv", "CTQ_Any", "PhyAbu_Any", "EmoAbu_Any", "SexAbu_Any", "PhyNeg_Any", "EmoNeg_Any", "Bullying_Any")
chi.sq_table <- chi_sq_results(df, categorical_features, "Pain_Class")
chi.sq_table
#save chi sq table as csv
#write.csv(chi.sq_table, "chi_sq_table.csv")
kruskal_wallis_results <- function(df, features, group_var) {
test_feature <- function(feature) {
if (!feature %in% names(df)) {
return(data.frame(
Feature = feature,
Chi_Square = NA,
P_Value = NA
))}
kw_test <- tryCatch(
kruskal.test(df[[feature]] ~ df[[group_var]]),
error = function(e) {
return(list(statistic = NA, p.value = NA))
})
chi_squared <- kw_test$statistic
p_value <- kw_test$p.value
return(data.frame(
Feature = feature,
Chi_Square = chi_squared,
P_Value = p_value
))}
results <- map_dfr(features, test_feature)
return(results)
}
continuous_features <- c("WK2_CTQSF_PhyAbu_RS", "WK2_CTQSF_EmoAbu_RS", "WK2_CTQSF_SexAbu_RS",
"WK2_CTQSF_PhyNeg_RS", "WK2_CTQSF_EmoNeg_RS", "WK2_Bullying_Total",
"ED_Age", "BMI", "ADI_NatRank", "ED_PDI_RS")
kw_table <- kruskal_wallis_results(df, continuous_features, "Pain_Class")
kw_table
#save kw table as csv
#write.csv(kw_table, "kw_table.csv")
#function to apply BH correction to list of models
apply_bh_correction <- function(models, model_names) {
correct_model <- function(model) {
model_results <- tidy(model, exponentiate = TRUE) %>%
filter(term != "(Intercept)") %>%
select(y.level,term, estimate, p.value)
model_results <- model_results %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH")) %>%
mutate(
p.value = round(p.value, digits = 6),
p_adj_bh = round(p_adj_bh, digits = 6)
) %>%
mutate(
p.value = format(p.value, scientific = FALSE),
p_adj_bh = format(p_adj_bh, scientific = FALSE)
)
return(model_results)
}
corrected_models <- lapply(models, correct_model)
names(corrected_models) <- model_names
return(corrected_models)
}
#function to get adjusted p values from list of models
extract_p_values <- function(model_df, model_name) {
model_df %>%
mutate(model = model_name) %>%
select(y.level, term, p.value,p_adj_bh, model)}
# Create binary variables for high/low CTQ and Bullying
df <- df %>%
mutate(
CTQ_HighLow = ifelse(CTQ_Total > 6, "High", "Low"),
Bullying_HighLow = ifelse(WK2_Bullying_Total > 3, "High", "Low"))
# Create binary variable for PRE_Pain_MdSv
df$PRE_Pain_MdSv2 <- ifelse(df$PRE_Pain_MdSv == "Yes", 1, 0)
# M33 Logistic regression using CTQ composite
m33 <- glm(PRE_Pain_MdSv2 ~ CTQ_Total, data = df, family = "binomial")
tidy(m33, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric)))
# M34 Logistic regression using Bullying composite
m34 <- glm(PRE_Pain_MdSv2 ~ WK2_Bullying_Total, data = df, family = "binomial")
tidy(m34, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric)))
# Chi-square test for CTQ_HighLow
chisq.test(table(df$CTQ_HighLow, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$CTQ_HighLow, df$PRE_Pain_MdSv)
X-squared = 33.911, df = 1, p-value = 5.77e-09
# Chi-square test for Bullying_HighLow
chisq.test(table(df$Bullying_HighLow, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$Bullying_HighLow, df$PRE_Pain_MdSv)
X-squared = 17.926, df = 1, p-value = 2.297e-05
# chi-square test for any CTQ
chisq.test(table(df$CTQ_Any, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$CTQ_Any, df$PRE_Pain_MdSv)
X-squared = 23.917, df = 1, p-value = 1.006e-06
# chi-square test for any bullying
chisq.test(table(df$Bullying_Any, df$PRE_Pain_MdSv))
Pearson's Chi-squared test with Yates' continuity correction
data: table(df$Bullying_Any, df$PRE_Pain_MdSv)
X-squared = 4.2373, df = 1, p-value = 0.03955
Main multi-nominal regression model with CTQ as a predictor variable, including our standard covariates (same in linear models), and latent pain trajectories as the dependent variables. Model 2 is this with extra features.
#pain trajectory ~ age + sex + race + site + CTQ
m1 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total, data = df)
# weights: 40 (27 variable)
initial value 3428.305955
iter 10 value 3127.805259
iter 20 value 3086.634930
iter 30 value 3080.608403
final value 3080.567070
converged
#how many samples are used in the m1 model?
nrow(df)
[1] 2480
#view output
tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
#save this table output as a csv
#write.csv(tidy(m1, exponentiate = TRUE, conf.int = TRUE) %>% mutate(p_adj_bh = p.adjust(p.value, method = "BH")), "model1_CTQ_Total.csv")
#examine variance inflation factor to assess collinearity
vif(m1)
Warning: No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 12.103904 1 3.479066
Site_New 3.792132 2 1.395471
ED_RaceEthCode 16.213547 3 1.590913
ED_GenderBirthCert 1.753031 1 1.324021
CTQ_Total 2.846419 1 1.687133
Variance Inflation Factor (VIF) is used to detect multicollinearity. VIF > 10: Indicates high multicollinearity that might be problematic. VIF between 5 and 10: Moderate multicollinearity that might need addressing. VIF < 5: Generally considered acceptable.
m2 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2728.371269
iter 20 value 2576.188532
iter 30 value 2496.937590
iter 40 value 2482.878068
iter 50 value 2482.176828
final value 2482.128451
converged
tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
#save this table outout as a csv
write.csv(tidy(m2, exponentiate = TRUE, conf.int = TRUE) %>% mutate(p_adj_bh = p.adjust(p.value, method = "BH")), "model2_full_CTQ_Total.csv")
vif(m2)
Warning: No intercept: vifs may not be sensible.
GVIF Df GVIF^(1/(2*Df))
ED_Age 17.705109 1 4.207744
Site_New 4.738177 2 1.475376
ED_RaceEthCode 19.673936 3 1.643042
ED_GenderBirthCert 1.941991 1 1.393553
CTQ_Total 3.011481 1 1.735362
BMI 18.363061 1 4.285214
PRE_Pain_MdSv 3.067240 1 1.751354
ED_Marital 10.289051 2 1.790993
ADI_NatRank 12.774043 1 3.574079
ED_Event_BroadClass 2.156182 2 1.211773
ED_PDI_RS 7.261571 1 2.694730
Using m2, what is the bullying score cut off where high bullying is predictive of high/moderate pain? Using that cutoff score, how many participants have high vs. low (same as any bullying but instead of 0 vs 1+, it uses the cutoff score) bullying and what are the ORs (fully adjusted model) for high bullying for each of the pain classes?
library(nnet)
library(broom)
library(dplyr)
library(purrr)
# Define the cutoffs to test
cutoffs <- c(1, 2, 3, 4, 5, 6, 7)
# Initialize a list to store results
results <- list()
for (cutoff in cutoffs) {
# Create a binary bullying variable and ensure it's a factor with "Low" as the reference
df <- df %>%
mutate(
Bullying_HighLow = ifelse(WK2_Bullying_Total > cutoff, "High", "Low"),
Bullying_HighLow = factor(Bullying_HighLow, levels = c("Low", "High"))
)
# Check how many participants fall into each group for this cutoff
cat("Cutoff:", cutoff, "\n")
print(table(df$Bullying_HighLow))
# Fit the multinomial regression model
model <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert +
Bullying_HighLow + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank +
ED_Event_BroadClass + ED_PDI_RS, data = df)
# Extract the model terms
model_terms <- tidy(model, exponentiate = TRUE, conf.int = TRUE)
cat("Model terms for cutoff", cutoff, ":\n")
print(model_terms)
# Filter for Bullying term
# If the term doesn't match "Bullying_HighLowHigh", try partial matching like this:
or_table <- model_terms %>%
filter(grepl("Bullying_HighLow", term)) %>%
mutate(Cutoff = cutoff)
# Append results for this cutoff
results[[paste0("Cutoff_", cutoff)]] <- or_table
}
Cutoff: 1
Low High
776 1704
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.558124
iter 20 value 2550.431620
iter 30 value 2494.022691
iter 40 value 2488.983582
iter 50 value 2488.863556
final value 2488.842771
converged
Model terms for cutoff 1 :
Cutoff: 2
Low High
1197 1283
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.574338
iter 20 value 2540.282951
iter 30 value 2491.634509
iter 40 value 2485.430993
iter 50 value 2484.943954
final value 2484.918956
converged
Model terms for cutoff 2 :
Cutoff: 3
Low High
1502 978
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.330657
iter 20 value 2534.356688
iter 30 value 2485.462908
iter 40 value 2479.208654
iter 50 value 2478.947241
final value 2478.922140
converged
Model terms for cutoff 3 :
Cutoff: 4
Low High
1879 601
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.150582
iter 20 value 2539.409547
iter 30 value 2483.829285
iter 40 value 2479.214810
iter 50 value 2479.018444
final value 2479.010175
converged
Model terms for cutoff 4 :
Cutoff: 5
Low High
2064 416
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.487252
iter 20 value 2545.065534
iter 30 value 2491.116014
iter 40 value 2486.279849
iter 50 value 2486.125555
final value 2486.117414
converged
Model terms for cutoff 5 :
Cutoff: 6
Low High
2229 251
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.438408
iter 20 value 2543.171124
iter 30 value 2488.320551
iter 40 value 2483.262161
iter 50 value 2483.011903
final value 2483.001140
converged
Model terms for cutoff 6 :
Cutoff: 7
Low High
2335 145
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.576967
iter 20 value 2542.511577
iter 30 value 2486.678386
iter 40 value 2482.017246
iter 50 value 2481.827238
final value 2481.812995
converged
Model terms for cutoff 7 :
# Bullying score dichot value table results
final_results <- bind_rows(results)
print(final_results)
#save final results able as csv
write.csv(final_results, "bullying_cutoff_final_results.csv")
top_cutoff <- final_results %>%
filter(term == "Bullying_HighLowHigh") %>%
arrange(desc(estimate)) %>%
slice(1)
cat("The cutoff with the highest OR is:", top_cutoff$Cutoff,
"for the", top_cutoff$y.level, "pain class with an OR of", top_cutoff$estimate, "\n")
The cutoff with the highest OR is: 7 for the high pain class with an OR of 3.676667
#table of each bullying score
table(df$WK2_Bullying_Total)
0 1 2 3 4 5 6 7 8
503 273 421 305 377 185 165 106 145
#how many people have a bullying score of not 8
sum(df$WK2_Bullying_Total != 8)
[1] 2335
m2b <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ed_OPIOIDS + ED_PDI_RS, data = df)
# weights: 76 (54 variable)
initial value 2470.376552
iter 10 value 2327.811839
iter 20 value 2208.933481
iter 30 value 2123.088026
iter 40 value 2113.591373
iter 50 value 2113.266337
iter 60 value 2113.212509
final value 2113.211427
converged
tidy(m2b, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
#save this table outout as a csv
#this is a modified version of the code with unscaled scores, running this script thru correctly will not result in unscaled
write.csv(tidy(m2b, exponentiate = TRUE, conf.int = TRUE) %>% mutate(p_adj_bh = p.adjust(p.value, method = "BH")), "model2_full_CTQ_Total_w_opioids_unscaled.csv")
m15 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_GenderBirthCert, data = df)
# weights: 76 (54 variable)
initial value 2905.672981
iter 10 value 2726.355411
iter 20 value 2583.152324
iter 30 value 2501.536175
iter 40 value 2481.998649
iter 50 value 2480.053420
iter 60 value 2479.938465
final value 2479.937746
converged
tidy(m15, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
m14 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + CTQ_Total*ED_RaceEthCode, data = df)
# weights: 84 (60 variable)
initial value 2905.672981
iter 10 value 2724.447079
iter 20 value 2616.065781
iter 30 value 2537.780301
iter 40 value 2481.689101
iter 50 value 2476.339172
iter 60 value 2476.177407
final value 2476.169853
converged
tidy(m14, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#physical abuse m16
m16 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2733.195514
iter 20 value 2592.981098
iter 30 value 2490.569716
iter 40 value 2478.612915
iter 50 value 2477.978558
final value 2477.950997
converged
#physical neglect m17
m17 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_PhyNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.635643
iter 20 value 2603.657118
iter 30 value 2500.923313
iter 40 value 2492.050220
iter 50 value 2491.534112
final value 2491.509002
converged
#emotional abuse m18
m18 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2733.800434
iter 20 value 2567.458622
iter 30 value 2483.364238
iter 40 value 2475.197007
iter 50 value 2474.385763
final value 2474.325475
converged
#emotional neglect m19
m19 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_EmoNeg_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2745.951381
iter 20 value 2568.972076
iter 30 value 2501.137657
iter 40 value 2491.034459
iter 50 value 2490.378500
final value 2490.335809
converged
#sexual abuse m20
m20 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_CTQSF_SexAbu_RS + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2735.299024
iter 20 value 2588.288776
iter 30 value 2495.493514
iter 40 value 2487.338546
iter 50 value 2486.805748
final value 2486.781130
converged
#bullying m21
m21 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + WK2_Bullying_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2742.799569
iter 20 value 2594.067749
iter 30 value 2490.690082
iter 40 value 2480.304323
iter 50 value 2479.542688
final value 2479.495450
converged
tidy(m16, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m17, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m18, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m19, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m20, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
tidy(m21, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
perform BH corrections on the values in combined_p_values_m16_m21
#BH corrections for models with subtypes m16-m21
#make first set of adjusted within p vals
models_list_subtypes <- list(m16, m17, m18, m19, m20, m21)
model_names2 <- c("m16", "m17", "m18", "m19", "m20", "m21")
adjusted_models2 <- apply_bh_correction(models_list_subtypes, model_names2)
combined_p_values_m16_m21 <- map2_dfr(adjusted_models2, paste0("m", 16:21), extract_p_values)
double_corrected_p_values_16_m21 <- combined_p_values_m16_m21 %>%
mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
mutate(
p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE) )
double_corrected_p_values_16_m21
M26: M2 + PhyAbu ANY
#Model 2 but instead of physical abuse score, its a binary score of whether or not the pt had any physical abuse
m26 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.215465
iter 20 value 2538.670319
iter 30 value 2490.267312
iter 40 value 2481.883743
iter 50 value 2481.407674
final value 2481.370933
converged
tidy(m26, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))%>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
Model 27: M2 + Emo_Abuse ANY
#Model 2 but instead of emotional abuse score, its a binary score of whether or not the pt had any emotional abuse
m27 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.545586
iter 20 value 2541.516221
iter 30 value 2489.192593
iter 40 value 2482.375221
iter 50 value 2482.194772
final value 2482.178232
converged
tidy(m27, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 28: M2 + Emo_Neglect ANY
#Model 2 but instead of emotional neglect score, its a binary score of whether or not the pt had any emotional neglect
m28 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + EmoNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.664578
iter 20 value 2558.737778
iter 30 value 2495.816626
iter 40 value 2490.341546
iter 50 value 2490.056292
final value 2490.038111
converged
tidy(m28, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 29: M2 + Phy_Neglect ANY
#Model 2 but instead of physical neglect score, its a binary score of whether or not the pt had any physical neglect
m29 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + PhyNeg_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.702303
iter 20 value 2548.462391
iter 30 value 2494.585863
iter 40 value 2489.264715
iter 50 value 2489.000832
final value 2488.979296
converged
tidy(m29, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 30: M2 + Sexual Abuse ANY
#Model 2 but instead of sexual abuse score, its a binary score of whether or not the pt had any sexual abuse
m30 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + SexAbu_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.380632
iter 20 value 2545.148484
iter 30 value 2492.757380
iter 40 value 2486.885581
iter 50 value 2486.521490
final value 2486.501395
converged
tidy(m30, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
Model 31: M2 + Bullying ANY
#Model 2 but instead of bullying score, its a binary score of whether or not the pt had any bullying
m31 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + Bullying_Any + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS, data = df)
# weights: 72 (51 variable)
initial value 2905.672981
iter 10 value 2746.165693
iter 20 value 2562.600366
iter 30 value 2494.807104
iter 40 value 2488.444171
iter 50 value 2488.177859
final value 2488.159649
converged
tidy(m31, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5)))
#BH BETWEEN Corrections for models with binary indicators m26-m31
models_list_ANY <- list(m26, m27, m28, m29, m30, m31)
model_names3 <- c("m26", "m27", "m28", "m29", "m30", "m31")
adjusted_models3 <- apply_bh_correction(models_list_ANY, model_names3)
double_corrected_p_values_m26_m31 <- map2_dfr(adjusted_models3, paste0("m", 26:31), extract_p_values)
#perform BH corrections on the values in double_corrected_p_values_m26_m31
double_corrected_p_values_m26_m31 <- double_corrected_p_values_m26_m31 %>%
mutate(p_adj_bh_2 = p.adjust(as.numeric(p_adj_bh), method = "BH")) %>%
mutate(
p_adj_bh_2 = round(p_adj_bh_2, digits = 6),
p_adj_bh_2 = format(p_adj_bh_2, scientific = FALSE) )
double_corrected_p_values_m26_m31
m13 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Triad + BMI + PRE_Pain_MdSv + ED_Concussion + ED_Event_BroadClass + ED_PDI_RS + ADI_NatRank, data = df)
# weights: 68 (48 variable)
initial value 2915.377041
iter 10 value 2739.008124
iter 20 value 2623.154036
iter 30 value 2509.056542
iter 40 value 2487.737115
iter 50 value 2487.237412
final value 2487.224791
converged
tidy(m13, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
m35 <- multinom(Pain_Class ~ ED_Age + Site_New + ED_RaceEthCode + ED_GenderBirthCert + CTQ_Total + BMI + PRE_Pain_MdSv + ED_Marital + ADI_NatRank + ED_Event_BroadClass + ED_PDI_RS + ed_OPIOIDS, data = df)
# weights: 76 (54 variable)
initial value 2470.376552
iter 10 value 2327.811839
iter 20 value 2208.933481
iter 30 value 2123.088026
iter 40 value 2113.591373
iter 50 value 2113.266337
iter 60 value 2113.212509
final value 2113.211427
converged
tidy(m35, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~ round(., 5))) %>%
mutate(p_adj_bh = p.adjust(p.value, method = "BH"))
#save this table output as a csv`
write.csv(tidy(m35, exponentiate = TRUE, conf.int = TRUE) %>% mutate(p_adj_bh = p.adjust(p.value, method = "BH")), "model35_full_CTQ_Opioids.csv")
#how many samples are used in the m35 model?
nrow(df)
[1] 2480
#complete cases
sum(complete.cases(df$ed_OPIOIDS))
[1] 1991
table(df$ed_OPIOIDS)
0 1
1309 682
#save fianl df as csv
write.csv(df, "Final_ELA_Analysis_Dataframe.csv")