Data Wrangling

library(ggplot2)  
library(tidyverse)
library(dplyr)
library(arsenal) 
library(survival)
library(haven)    
library(tidyr)    
library(janitor)  
library(survey)
library(binom)
library(forcats)
library(knitr)
library(tibble)

# Setwd: 
setwd("/Volumes/NCS-A Denckla/14826726/ICPSR_28581/DS0001/")

# dat1 is the whole dataset:
dat1 <- read.csv("28581-0001-Data-REST.tsv", sep="\t", header=T)
# Filter to columns relevant to our analysis:
dat2 <- dat1 %>% select(c("Age", "RespGender","racecat","DA39", "DA39b", "DA39c", "DA41c", "DA41b","PT20", "PT48", "PT64", "dsm_pts", "final_weight", "final_weight_psaq", "secu", "str", "dsm_add", "dsm_ago", "dsm_agp", "dsm_alah", "dsm_ald", "dsm_ano", "dsm_bingeh", "dsm_bingeany", "dsm_bipolarI", "dsm_bipolarII", "dsm_bipolarsub","dsm_bulh","dsm_cd","dsm_drah", "dsm_drd","dsm_dysh","dsm_gadh", "dsm_iedh","dsm_mddh","dsm_mde","dsm_odd","dsm_pds","dsm_pd_ago","dsm_pd_woago","dsm_sad","dsm_so","dsm_sp"))

# Rename column names:
dat2 <- dat2 %>% rename(c("gender"="RespGender", "bio_mom_alive"="DA39", "adol_age_mom_died"="DA39b", "mom_cause_death"="DA39c", "adol_age_dad_died"="DA41b", "dad_cause_death"="DA41c", "PTSD"="dsm_pts", "ADD"="dsm_add", "agor_w_wo_panic"="dsm_ago", "agor_wo_panic"="dsm_agp", "alc_abuse"="dsm_alah", "alc_depend"="dsm_ald", "anorexia"="dsm_ano", "binge"="dsm_bingeh", "any_binge"="dsm_bingeany", "bipolar1"="dsm_bipolarI", "bipolar2"="dsm_bipolarII", "sub_bipolar"="dsm_bipolarsub","bulimia"="dsm_bulh","conduct_dis"="dsm_cd","drug_abuse"="dsm_drah", "drug_depend"="dsm_drd","dysth"="dsm_dysh","GAD"="dsm_gadh", "IED"="dsm_iedh","maj_dep"="dsm_mddh","maj_dep_ep"="dsm_mde","ODD"="dsm_odd","panic"="dsm_pds","panic_ago"="dsm_pd_ago","panic_wo_ago"="dsm_pd_woago","sep_anxiety"="dsm_sad","social_phob"="dsm_so","sp_phobia"="dsm_sp"))

# Modify PTSD indicator to be binary: 
dat2 <- dat2 %>%
  mutate(PTSD_bin = case_when(
      PTSD == 1 ~ 1,
      PTSD == 5 ~ 0,
      TRUE ~ NA_real_))

# Make PT64 (index trauma variable) categorical:
dat2 <- dat2 %>%
 mutate(PT64_cat = factor(PT64))

# Modify the other disorder indicators to be binary:
dsm_vars <- c("ADD", "agor_w_wo_panic", "agor_wo_panic", "alc_abuse", "alc_depend",
  "anorexia", "binge", "any_binge", "bipolar1", "bipolar2", "sub_bipolar",
  "bulimia", "conduct_dis", "drug_abuse", "drug_depend", "dysth", "GAD",
  "IED", "maj_dep", "maj_dep_ep", "ODD", "panic", "panic_ago",
  "panic_wo_ago", "sep_anxiety", "social_phob", "sp_phobia")

dat2 <- dat2 %>%
  mutate(
    across(
      all_of(dsm_vars),
      ~ case_when(
        . == 1 ~ 1,
        . == 5 ~ 0,
        TRUE   ~ NA_real_),
      .names = "{.col}_bin"))

dsm_vars_bin <- paste0(dsm_vars, "_bin")

# Make var for death index (i.e., if index trauma = unexpected death, #20)
dat2 <- dat2 %>%
  mutate(
    death_index = case_when(
      PT64 == 20 ~ 1,
      !is.na(PT64) ~ 0,
      TRUE ~ NA_real_))

# Add var that counts the number of comorbid disorders:
dat2 <- dat2 %>%
  mutate(
    n_comorbid = rowSums(across(all_of(dsm_vars_bin)), na.rm = TRUE))

# Make disorder categories (if N/A then N/A):

# Fear disorders
# Includes: agoraphobia w or w/o panic, agoraphobia w/o panic, panic disorder, panic disorder w agoraphobia, panic disorder w/o agoraphobia, social phobia, specific phobia

dat2 <- dat2 %>%
  mutate(
    any_fear = case_when(
      agor_w_wo_panic == 1 | agor_wo_panic == 1 |
        panic == 1 | panic_ago == 1 | panic_wo_ago == 1 |
        social_phob == 1 | sp_phobia == 1 ~ 1,
      rowSums(!is.na(across(c(agor_w_wo_panic, agor_wo_panic,
                              panic, panic_ago, panic_wo_ago,
                              social_phob, sp_phobia)))) == 0 ~ NA_real_,
      TRUE ~ 0))

# Distress/anxiety disorders (excluding PTSD)
# Includes: dysthymia, GAD, separation anxiety disorder

dat2 <- dat2 %>%
  mutate(
    any_distress = case_when(
      dysth == 1 | GAD == 1  | sep_anxiety == 1 ~ 1,
      rowSums(!is.na(across(c(dysth, GAD, sep_anxiety)))) == 0 ~ NA_real_,
      TRUE ~ 0))


# Depression:
# Includes: major depressive disorder, major depressive episode
dat2 <- dat2 %>%
  mutate(
    any_dep = case_when(
      maj_dep == 1 | maj_dep_ep == 1 ~ 1,
      rowSums(!is.na(across(c(maj_dep, maj_dep_ep)))) == 0 ~ NA_real_,
      TRUE ~ 0))

# Behavior disorders:
# Includes: ADD, anorexia, binge disorder, any binge disorder, bulimia, IED, conduct disorder, ODD
dat2 <- dat2 %>%
  mutate(
    any_behavior = case_when(
      ADD == 1 | anorexia == 1 | binge == 1 | IED == 1 | any_binge == 1 |
        bulimia == 1 | conduct_dis == 1 | ODD == 1 ~ 1,
      rowSums(!is.na(across(c(ADD, anorexia, binge, IED, any_binge,
                              bulimia, conduct_dis, ODD)))) == 0 ~ NA_real_,
      TRUE ~ 0))

# Substance disorders:
# Includes: alcohol abuse, alcohol dependence, drug abuse, drug dependence

dat2 <- dat2 %>%
  mutate(
    any_substance = case_when(
      alc_abuse == 1 | alc_depend == 1 | drug_abuse == 1 | drug_depend == 1 ~ 1,
      rowSums(!is.na(across(c(alc_abuse, alc_depend, drug_abuse, drug_depend)))) == 0 ~ NA_real_,
      TRUE ~ 0))

# Other disorders:
# Includes: bipolar disorders
dat2 <- dat2 %>%
  mutate(
    any_other = case_when(
      bipolar1 == 1 | bipolar2 == 1 | sub_bipolar == 1 ~ 1,
      rowSums(!is.na(across(c(bipolar1, bipolar2, sub_bipolar)))) == 0 ~ NA_real_,
      TRUE ~ 0))

# Any disorder at all (i.e., any of fear, distress, depression, behavior, substance, other):
dat2 <- dat2 %>%
  mutate(
    any_disorder = case_when(
      any_fear == 1 | any_behavior == 1 | any_dep == 1 |any_distress == 1 |
        any_substance == 1 | any_other == 1 ~ 1,
      rowSums(!is.na(across(c(any_fear, any_behavior, any_distress,
                              any_substance, any_other)))) == 0 ~ NA_real_,
      TRUE ~ 0))

Variable Definitions Table:

main_variables_table <- tibble(
  `Original Variable Name` = c(
    "Age", "RespGender", "racecat",
    "PT64", "--", "--",
    "dsm_pts", "--",
    "final_weight", "secu", "str",
    "dsm_add", "dsm_ago", "dsm_agp", "dsm_alah", "dsm_ald",
    "dsm_ano", "dsm_bingeh", "dsm_bingeany",
    "dsm_bipolarI", "dsm_bipolarII", "dsm_bipolarsub",
    "dsm_bulh", "dsm_cd", "dsm_drah", "dsm_drd",
    "dsm_dysh", "dsm_gadh", "dsm_iedh",
    "dsm_mddh", "dsm_mde", "dsm_odd",
    "dsm_pds", "dsm_pd_ago", "dsm_pd_woago",
    "dsm_sad", "dsm_so", "dsm_sp",
    "--", "--", "--", "--", "--", "--", "--", "--"),
  
  `New Variable Name` = c(
    "Age", "gender", "racecat",
    "PT64", "PT64_cat", "death_index",
    "PTSD", "PTSD_bin",
    "final_weight", "secu", "str",
    "ADD", "agor_w_wo_panic", "agor_wo_panic", "alc_abuse", "alc_depend",
    "anorexia", "binge", "any_binge",
    "bipolar1", "bipolar2", "sub_bipolar",
    "bulimia", "conduct_dis", "drug_abuse", "drug_depend",
    "dysth", "GAD", "IED",
    "maj_dep", "maj_dep_ep", "ODD",
    "panic", "panic_ago", "panic_wo_ago",
    "sep_anxiety", "social_phob", "sp_phobia",
    "n_comorbid",
    "any_fear", "any_distress", "any_dep",
    "any_behavior", "any_substance", "any_other", "any_disorder"),
  
  `Variable Description` = c(
    "Age",
    "Gender (1 = male, 2 = female)",
    "Race/ethnicity category",
    "Index trauma variable (the event that caused the most problems; 20 = unexpected death of a loved one",
    "Categorical version of PT64",
    "Binary indicator for whether the index trauma was unexpected death of a loved one (1 = yes, 0 = no)",
    "Lifetime PTSD diagnosis indicator",
    "Binary PTSD diagnosis indicator (1 = meets criteria, 0 = does not meet criteria)",
    "Final survey weight for full adolescent sample",
    "Cluster variable",
    "Stratification variable",
    "Attention Deficit Disorder (lifetime)",
    "Agoraphobia with or without panic disorder (lifetime)",
    "Agoraphobia without panic disorder (lifetime)",
    "Alcohol abuse with hierarchy (lifetime)",
    "Alcohol dependence (lifetime)",
    "Anorexia (lifetime)",
    "Binge disorder with hierarchy (lifetime)",
    "Any binge disorder (lifetime)",
    "Model-based bipolar I disorder (lifetime)",
    "Model-based bipolar II disorder (lifetime)",
    "Model-based subthreshold bipolar disorder (lifetime)",
    "Bulimia with hierarchy (lifetime)",
    "Conduct disorder (lifetime)",
    "Drug abuse with hierarchy (lifetime)",
    "Drug dependence (lifetime)",
    "Dysthymia with hierarchy (lifetime)",
    "Generalized anxiety disorder with hierarchy (lifetime)",
    "Intermittent explosive disorder with hierarchy (lifetime)",
    "Major depressive disorder with hierarchy (lifetime)",
    "Major depressive episode (lifetime)",
    "Oppositional defiant disorder (lifetime)",
    "Panic disorder (lifetime)",
    "Panic disorder with agoraphobia (lifetime)",
    "Panic disorder without agoraphobia (lifetime)",
    "Separation anxiety disorder (lifetime)",
    "Social phobia (lifetime)",
    "Specific phobia (lifetime)",
    "Count of comorbid DSM-IV diagnoses across the 27 non-PTSD disorders",
    "Binary indicator for having any fear disorder",
    "Binary indicator for having any distress disorder",
    "Binary indicator for having any depressive disorder",
    "Binary indicator for havingany behavioral/eating/impulse-control disorder",
    "Binary indicator for having any substance use disorder",
    "Binary indicator for having any other disorder",
    "Binary indicator for having any disorder across the 27 non-PTSD diagnoses"))

kable(main_variables_table)
Original Variable Name New Variable Name Variable Description
Age Age Age
RespGender gender Gender (1 = male, 2 = female)
racecat racecat Race/ethnicity category
PT64 PT64 Index trauma variable (the event that caused the most problems; 20 = unexpected death of a loved one
PT64_cat Categorical version of PT64
death_index Binary indicator for whether the index trauma was unexpected death of a loved one (1 = yes, 0 = no)
dsm_pts PTSD Lifetime PTSD diagnosis indicator
PTSD_bin Binary PTSD diagnosis indicator (1 = meets criteria, 0 = does not meet criteria)
final_weight final_weight Final survey weight for full adolescent sample
secu secu Cluster variable
str str Stratification variable
dsm_add ADD Attention Deficit Disorder (lifetime)
dsm_ago agor_w_wo_panic Agoraphobia with or without panic disorder (lifetime)
dsm_agp agor_wo_panic Agoraphobia without panic disorder (lifetime)
dsm_alah alc_abuse Alcohol abuse with hierarchy (lifetime)
dsm_ald alc_depend Alcohol dependence (lifetime)
dsm_ano anorexia Anorexia (lifetime)
dsm_bingeh binge Binge disorder with hierarchy (lifetime)
dsm_bingeany any_binge Any binge disorder (lifetime)
dsm_bipolarI bipolar1 Model-based bipolar I disorder (lifetime)
dsm_bipolarII bipolar2 Model-based bipolar II disorder (lifetime)
dsm_bipolarsub sub_bipolar Model-based subthreshold bipolar disorder (lifetime)
dsm_bulh bulimia Bulimia with hierarchy (lifetime)
dsm_cd conduct_dis Conduct disorder (lifetime)
dsm_drah drug_abuse Drug abuse with hierarchy (lifetime)
dsm_drd drug_depend Drug dependence (lifetime)
dsm_dysh dysth Dysthymia with hierarchy (lifetime)
dsm_gadh GAD Generalized anxiety disorder with hierarchy (lifetime)
dsm_iedh IED Intermittent explosive disorder with hierarchy (lifetime)
dsm_mddh maj_dep Major depressive disorder with hierarchy (lifetime)
dsm_mde maj_dep_ep Major depressive episode (lifetime)
dsm_odd ODD Oppositional defiant disorder (lifetime)
dsm_pds panic Panic disorder (lifetime)
dsm_pd_ago panic_ago Panic disorder with agoraphobia (lifetime)
dsm_pd_woago panic_wo_ago Panic disorder without agoraphobia (lifetime)
dsm_sad sep_anxiety Separation anxiety disorder (lifetime)
dsm_so social_phob Social phobia (lifetime)
dsm_sp sp_phobia Specific phobia (lifetime)
n_comorbid Count of comorbid DSM-IV diagnoses across the 27 non-PTSD disorders
any_fear Binary indicator for having any fear disorder
any_distress Binary indicator for having any distress disorder
any_dep Binary indicator for having any depressive disorder
any_behavior Binary indicator for havingany behavioral/eating/impulse-control disorder
any_substance Binary indicator for having any substance use disorder
any_other Binary indicator for having any other disorder
any_disorder Binary indicator for having any disorder across the 27 non-PTSD diagnoses

Descriptive data:

Total PTSD cases:

# Filter to cases w/ survey design variables (i.e., student responses):
dat2 <- dat2 %>%
  filter(
    !is.na(secu),
    !is.na(str),
    !is.na(final_weight)
  )

# Total PTSD cases:
sum(dat2$PTSD == 1, na.rm = TRUE)
## [1] 386

PTSD cases with a valid index trauma:

sum(!is.na(dat2$PT64) & dat2$PTSD == 1, na.rm = TRUE)
## [1] 339

PTSD cases with “unexpected death of a loved one” as index trauma:

# PTSD cases with "unexpected death of a loved one" as index trauma: 
sum(dat2$PTSD == 1 & dat2$PT64 == 20, na.rm = TRUE)
## [1] 100
# Counts among PTSD cases:
counts_ptsd <- dat2 %>%
  filter(PTSD_bin == 1, !is.na(PT64)) %>%
  group_by(PT64) %>%
  summarise(n_unweighted = n(), .groups = "drop")

Appendix Tables:

Appendix A:

appendix_a <- tibble(
  `Diagnosis` = c(
    "Attention Deficit Disorder",
    "Agoraphobia with or without panic disorder",
    "Agoraphobia without panic disorder",
    "Panic disorder",
    "Panic disorder with agoraphobia",
    "Panic disorder without agoraphobia",
    "Social phobia",
    "Specific phobia",
    "Dysthymia",
    "Generalized anxiety disorder",
    "Separation anxiety disorder",
    "Major depressive disorder",
    "Major depressive episode",
    "Anorexia",
    "Binge disorder",
    "Any binge disorder",
    "Bulimia",
    "Intermittent explosive disorder",
    "Conduct disorder",
    "Oppositional defiant disorder",
    "Alcohol abuse",
    "Alcohol dependence",
    "Drug abuse",
    "Drug dependence",
    "Bipolar I disorder",
    "Bipolar II disorder",
    "Subthreshold bipolar disorder"),
  `Comorbidity category` = c(
    "Behavior", "Fear", "Fear", "Fear", "Fear", "Fear",
    "Fear", "Fear",
    "Distress", "Distress", "Distress",
    "Depression", "Depression",
    "Behavior", "Behavior", "Behavior", "Behavior",
    "Behavior", "Behavior", "Behavior",
    "Substance", "Substance", "Substance", "Substance",
    "Other", "Other", "Other"
  ))

kable(appendix_a)
Diagnosis Comorbidity category
Attention Deficit Disorder Behavior
Agoraphobia with or without panic disorder Fear
Agoraphobia without panic disorder Fear
Panic disorder Fear
Panic disorder with agoraphobia Fear
Panic disorder without agoraphobia Fear
Social phobia Fear
Specific phobia Fear
Dysthymia Distress
Generalized anxiety disorder Distress
Separation anxiety disorder Distress
Major depressive disorder Depression
Major depressive episode Depression
Anorexia Behavior
Binge disorder Behavior
Any binge disorder Behavior
Bulimia Behavior
Intermittent explosive disorder Behavior
Conduct disorder Behavior
Oppositional defiant disorder Behavior
Alcohol abuse Substance
Alcohol dependence Substance
Drug abuse Substance
Drug dependence Substance
Bipolar I disorder Other
Bipolar II disorder Other
Subthreshold bipolar disorder Other

Appendix B:

appendix_b <- tibble(
  `PT64 code` = c(4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16,
    17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28),
  `Index trauma category` = c(
  "Being in a place with ongoing terror",
  "Being a refugee",
  "Being kidnapped",
  "Poisonous chemical exposure",
  "Serious car accident",
  "Very serious or life-threatening accident",
  "Major disaster",
  "Life-threatening illness",
  "Beaten up by caregiver",
  "Beaten up by romantic partner",
  "Beaten by somebody else",
  "Mugged or threatened with a weapon",
  "Raped",
  "Sexually assaulted",
  "Stalked",
  "Unexpected death of a loved one",
  "Traumatic event to loved one",
  "Witnessed physical fights at home",
  "Witnessed death or dead body / saw someone seriously hurt",
  "Accidentally caused serious injury or death",
  "Saw someone purposely injured, tortured, or killed",
  "Some other event",
  "Private event"))

kable(appendix_b)
PT64 code Index trauma category
4 Being in a place with ongoing terror
5 Being a refugee
6 Being kidnapped
7 Poisonous chemical exposure
8 Serious car accident
9 Very serious or life-threatening accident
10 Major disaster
12 Life-threatening illness
13 Beaten up by caregiver
14 Beaten up by romantic partner
15 Beaten by somebody else
16 Mugged or threatened with a weapon
17 Raped
18 Sexually assaulted
19 Stalked
20 Unexpected death of a loved one
22 Traumatic event to loved one
23 Witnessed physical fights at home
24 Witnessed death or dead body / saw someone seriously hurt
25 Accidentally caused serious injury or death
26 Saw someone purposely injured, tortured, or killed
27 Some other event
28 Private event
# Helper - make lookup for index trauma def:  
pt64_lookup <- tibble(
  trauma_code = c("4","5","6","7","8","9","10","12","13","14","15","16",
    "17","18","19","20","22","23","24","25","26","27","28"),
  trauma_label = c(
    "Ongoing terror",
    "Refugee",
    "Kidnapped",
    "Chemical exposure",
    "Serious car accident",
    "Life-threatening accident",
    "Major disaster",
    "Life-threatening illness",
    "Beaten by caregiver",
    "Beaten by romantic partner",
    "Beaten by someone else",
    "Mugged / threatened with weapon",
    "Rape",
    "Sexually assaulted",
    "Stalked",
    "Unexpected death of a loved one",
    "Traumatic event to loved one",
    "Witnessed fights at home",
    "Witnessed death / serious injury",
    "Accidentally caused injury/death",
    "Saw someone injured/tortured/killed",
    "Other event",
    "Private event"))

RQ 1:

# Make survey design variable:
design <- svydesign(
  id     = ~secu,
  strata  = ~str,
  weights = ~final_weight,
  data    = dat2,
  nest    = TRUE)

# Subset to PTSD cases with a valid index trauma:
rq1_design <- subset(design, PTSD_bin == 1 & !is.na(PT64))

# Weighted proportion of PTSD cases by index trauma category:
ptsd_index_dist <- svymean(~PT64_cat, rq1_design, na.rm = TRUE)

# Make df:
ptsd_index_dist_df <- data.frame(
  trauma_code = sub("^PT64_cat", "", names(coef(ptsd_index_dist))),
  proportion = as.numeric(coef(ptsd_index_dist)),
  se = as.numeric(SE(ptsd_index_dist))) %>%
  mutate(
    percent = proportion * 100,
    ci_low  = pmax(0, proportion - 1.96 * se) * 100,
    ci_high = pmin(1, proportion + 1.96 * se) * 100) %>%
  arrange(desc(proportion))

# Add un-weighted count by index trauma type: number of PTSD cases whose index trauma = that category
n_ptsd_cases <- dat2 %>%
  filter(PTSD_bin == 1, !is.na(PT64_cat)) %>%
  mutate(PT64_cat = as.character(PT64_cat)) %>%
  count(PT64_cat, name = "n_ptsd_cases")

# Join to final df:
ptsd_index_dist_df <- ptsd_index_dist_df %>%
  left_join(n_ptsd_cases, by = c("trauma_code" = "PT64_cat"))

TABLE 1:

# Make table:
rq1_table <- ptsd_index_dist_df %>%
  left_join(pt64_lookup, by = "trauma_code") %>%
  filter(!is.na(trauma_label),
         trauma_label != "NA",
         percent > 0) %>%
  arrange(desc(percent)) %>%
  mutate(percent = round(percent, 2),
         ci_low = round(ci_low, 2),
         ci_high = round(ci_high, 2),
         ci = paste0(ci_low, "–", ci_high)) %>%
    select(
      trauma_label,
      n_ptsd_cases,
      percent,
      ci) %>%
  rename(
    `Index Trauma` = trauma_label,
    `N (PTSD cases)` = n_ptsd_cases,
    `% of PTSD cases` = percent,
    `95% CI` = ci)

kable(rq1_table, align = "lccc",
  caption = "Distribution of Index Trauma Types Among Adolescents With PTSD")
Distribution of Index Trauma Types Among Adolescents With PTSD
Index Trauma N (PTSD cases) % of PTSD cases 95% CI
Unexpected death of a loved one 100 32.81 25.68–39.95
Rape 87 22.67 16.37–28.97
Private event 30 10.48 3.52–17.44
Beaten by caregiver 24 9.54 5.99–13.08
Witnessed fights at home 18 5.45 1.77–9.13
Other event 21 5.07 1.89–8.25
Witnessed death / serious injury 18 4.57 0.64–8.49
Stalked 9 3.21 0.59–5.82
Serious car accident 15 3.01 1.07–4.95
Mugged / threatened with weapon 7 1.91 0.37–3.45
Beaten by someone else 7 1.12 0.14–2.11
Ongoing terror 2 0.15 0–0.39
Traumatic event to loved one 1 0.02 0–0.07

FIGURE 1:

# Graph:
rq1_plot_df <- ptsd_index_dist_df %>%
  left_join(pt64_lookup, by = "trauma_code") %>%
  filter(
    !is.na(trauma_label),             
    trauma_label != "NA",         
    !is.na(percent),                
    percent > 0             
  ) %>%
  mutate(
    highlight = ifelse(trauma_code == "20", "Unexpected death", "Other"),
    trauma_label = fct_reorder(trauma_label, percent))

ggplot(rq1_plot_df, aes(x = trauma_label, y = percent, fill = highlight)) +
  geom_col() +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
  coord_flip() +
  scale_fill_manual(values = c(
    "Unexpected death" = "#DA8EE7",
    "Other" = "gray70")) +
  labs(
    title = "Distribution of Index Trauma Types Among Adolescents With PTSD ",
    x = "Index trauma",
    y = "% of PTSD cases",
    fill = NULL) +
  theme_minimal() +
  theme(plot.title.position = "plot",
        plot.title = element_text(hjust = 0.2))

RQ 2:

# Part 1: proportion
# Subset to people with a valid PTSD diagnosis & a valid index trauma:
rq2_design <- subset(design, !is.na(PT64) & !is.na(PTSD_bin))

# Proportion of PTSD within each index trauma: 
ptsd_by_trauma <- svyby(
  ~PTSD_bin,
  ~PT64_cat,
  design = rq2_design,
  FUN = svymean,
  na.rm = TRUE,
  vartype = c("se", "ci"))

# Make df:
ptsd_by_trauma_df <- as.data.frame(ptsd_by_trauma) %>%
  rename(
    trauma_code = PT64_cat,
    ptsd_prop = PTSD_bin,
    se = se,
    ci_low = ci_l,
    ci_high = ci_u
  ) %>%
  arrange(desc(ptsd_prop))

# Add un-weighted count of the total # of people who report experiencing the index trauma (with non-missing PTSD):
n_trauma_index_cases <- dat2 %>%
  filter(!is.na(PT64_cat), !is.na(PTSD_bin)) %>%
    mutate(PT64_cat = as.character(PT64_cat)) %>% 
  count(PT64_cat, name = "n_trauma_index_cases")

# Add 2 counts (# of index trauma exposure & # ptsd cases)
ptsd_by_trauma_df <- ptsd_by_trauma_df %>%
  left_join(n_trauma_index_cases, by = c("trauma_code" = "PT64_cat")) %>%
  left_join(n_ptsd_cases, by = c("trauma_code" = "PT64_cat"))

# Part 2: Logistics regression (is PTSD more/less likely after unexpected death vs. other trauma?)
rq2_model2 <- svyglm(PTSD_bin ~ death_index + Age + gender + racecat, 
                     design = rq2_design, 
                     family = quasibinomial())

TABLE 2:

# Presenting results:
rq2_results <- ptsd_by_trauma_df %>%
  mutate(trauma_code = as.character(trauma_code)) %>%
  left_join(pt64_lookup, by = "trauma_code") %>%
    filter(
    !is.na(trauma_label),
    trauma_label != "NA",
    ptsd_prop > 0
  ) %>%
  mutate(
    ptsd_percent = ptsd_prop * 100,
    ci_low_pct = ci_low * 100,
    ci_high_pct = ci_high * 100
  ) %>%
  select(
    trauma_code,
    trauma_label,
    n_trauma_index_cases,
    n_ptsd_cases,
    ptsd_percent,
    ci_low_pct,
    ci_high_pct) %>%
  arrange(desc(ptsd_percent))

rq2_table <- rq2_results %>%
  mutate(`Probability of PTSD (%)` = round(ptsd_percent, 2),
         `95% CI` = paste0(round(ci_low_pct, 2), "–", round(ci_high_pct, 2))) %>%
 select(
    `Index Trauma` = trauma_label,
    `N (Index trauma cases)` = n_trauma_index_cases,
    `N (PTSD cases)` = n_ptsd_cases,
    `Probability of PTSD (%)`,
    `95% CI`)

kable(rq2_table,align = "lcccc",
      caption = "PTSD Probability by Index Trauma Type")
PTSD Probability by Index Trauma Type
Index Trauma N (Index trauma cases) N (PTSD cases) Probability of PTSD (%) 95% CI
Beaten by caregiver 36 24 62.32 38.41–86.23
Private event 61 30 59.67 40.37–78.97
Rape 140 87 59.05 47.06–71.03
Witnessed death / serious injury 48 18 45.50 20.62–70.38
Unexpected death of a loved one 276 100 41.05 31.41–50.7
Beaten by someone else 17 7 38.89 11.27–66.52
Stalked 26 9 34.06 11.82–56.3
Mugged / threatened with weapon 20 7 31.75 5.04–58.46
Witnessed fights at home 65 18 31.73 14.62–48.84
Other event 74 21 26.86 13.5–40.23
Serious car accident 77 15 16.28 5.49–27.07
Ongoing terror 6 2 10.29 -6.53–27.11
Traumatic event to loved one 18 1 0.57 -0.62–1.76

FIGURE 2:

# Graph:
rq2_plot <- rq2_results %>%
    filter(
    !is.na(trauma_label),             
    trauma_label != "NA",         
    !is.na(ptsd_percent),                
    ptsd_percent > 0             
  ) %>%
  mutate(highlight = ifelse(trauma_code == "20", "Unexpected death", "Other"),
    trauma_label = forcats::fct_reorder(trauma_label, ptsd_percent))

ggplot(rq2_plot, aes(x = trauma_label, y = ptsd_percent, fill = highlight)) +
  geom_col() +
  geom_errorbar(aes(ymin = ci_low_pct, ymax = ci_high_pct), width = 0.2) +
  coord_flip() +
  scale_fill_manual(values = c(
    "Unexpected death" = "#DA8EE7",
    "Other" = "lightgray")) +
  labs(
    title = "PTSD Probability by Index Trauma Type",
    x = "Index trauma",
    y = "Probability of PTSD (%)",
    fill = NULL) +
  theme_minimal(base_size = 12) +
  theme(plot.title.position = "plot",
        plot.title = element_text(hjust = 0.2))

TABLE 3:

# Logistics table:
coef_tab <- summary(rq2_model2)$coefficients

# Make OR table
rq2_log <- data.frame(
  predictor = rownames(coef_tab),
  beta = coef(rq2_model2),
  SE  = coef_tab[, "Std. Error"],
  OR = exp(coef(rq2_model2)),
  CI_low = exp(confint(rq2_model2)[, 1]),
  CI_high = exp(confint(rq2_model2)[, 2]),
  p_value = coef_tab[, "Pr(>|t|)"],
  row.names = NULL)

# Display table
rq2_log_table <- rq2_log %>%
  mutate(
    predictor = recode(predictor,
      "(Intercept)" = "Intercept",
      "death_index" = "Unexpected death of loved one (vs. other trauma types)",
      "Age" = "Age",
      "gender" = "Gender",
      "racecat" = "Race/ethnicity"),
    `95% CI` = paste0("[", sprintf("%.3f", CI_low), ", ", sprintf("%.3f", CI_high), "]"),
    beta = sprintf("%.3f", beta),
    SE = sprintf("%.3f", SE),
    OR = sprintf("%.3f", OR),
    p_value = sprintf("%.3f", p_value)
  ) %>%
  select(
    Predictor = predictor,
    beta,
    SE,
    OR,
    `95% CI`,
    p_value)

kable(rq2_log_table, align = "lccccc",
  caption = "Logistics Regression for PTSD Probability: Unexpected Death of a Loved One vs. Other Index Traumas")
Logistics Regression for PTSD Probability: Unexpected Death of a Loved One vs. Other Index Traumas
Predictor beta SE OR 95% CI p_value
Intercept -3.005 1.204 0.050 [0.004, 0.568] 0.017
Unexpected death of loved one (vs. other trauma types) -0.027 0.257 0.973 [0.578, 1.637] 0.916
Age 0.036 0.070 1.036 [0.900, 1.194] 0.612
Gender 0.833 0.240 2.301 [1.415, 3.740] 0.001
Race/ethnicity 0.205 0.075 1.227 [1.055, 1.427] 0.009

RQ 3:

# Subset to PTSD cases with a valid index trauma:
rq3_design <- subset(design, PTSD_bin == 1 & !is.na(PT64))

# Proportion of comorbidity for unexp death vs. others:
rq3_res_1 <- svyby(
  ~any_disorder + n_comorbid + any_fear + any_distress + any_dep + any_behavior + any_substance + any_other,
  ~death_index,
  design = rq3_design,
  FUN = svymean,
  na.rm = TRUE)

rq3_df <- as.data.frame(rq3_res_1) %>%
  mutate(trauma_group = ifelse(death_index == 1,
                          "Unexpected death of loved one",
                          "All other index traumas")) %>%
  select(trauma_group, everything())

# Add unweighted count for the # of PTSD cases in each group (death vs. others):
counts_rq3 <- dat2 %>%
  filter(PTSD_bin == 1, !is.na(death_index)) %>%
  count(death_index, name = "n_ptsd_cases")

rq3_df <- rq3_df %>%
  left_join(counts_rq3, by = "death_index")

# Two-sample t-test for death vs. other index trauma: compare # of comorbidites:
ttest_res <- svyttest(n_comorbid ~ death_index, design = rq3_design)

# Chi-square tests for death vs. other index trauma: compare comorbidity categories:
chi_any_disorder  <- svychisq(~any_disorder + death_index, design = rq3_design)
chi_any_dep  <- svychisq(~any_dep + death_index, design = rq3_design)
chi_any_fear <- svychisq(~any_fear + death_index, design = rq3_design)
chi_any_distress <- svychisq(~any_distress + death_index, design = rq3_design)
chi_any_substance <- svychisq(~any_substance + death_index, design = rq3_design)
chi_any_behavior <- svychisq(~any_behavior + death_index, design = rq3_design)
chi_any_other <- svychisq(~any_other + death_index, design = rq3_design)

TABLE 4:

# Table:
rq3_table <- rq3_df %>%
  mutate(any_disorder_pct = any_disorder * 100,
    any_fear_pct = any_fear * 100,
    any_distress_pct = any_distress * 100,
    any_dep_pct = any_dep * 100,
    any_behavior_pct  = any_behavior * 100,
    any_substance_pct = any_substance * 100,
    any_other_pct = any_other * 100) %>%
  select(
    trauma_group,
    n_ptsd_cases,
    n_comorbid,
    any_disorder_pct,
    any_fear_pct,
    any_distress_pct,
    any_dep_pct,
    any_behavior_pct,
    any_substance_pct,
    any_other_pct) %>%
  mutate(across(c(n_comorbid, any_disorder_pct, any_fear_pct, any_distress_pct,
        any_dep_pct, any_behavior_pct, any_substance_pct, any_other_pct),
      ~ round(., 2)))

rq3_table_show <- rq3_table %>%
  rename(`Index Trauma Group` = trauma_group,
    `N (PTSD cases)` = n_ptsd_cases,
    `Mean # comorbid disorders` = n_comorbid,
    `Any disorder (%)` = any_disorder_pct,
    `Fear (%)` = any_fear_pct,
    `Distress (%)` = any_distress_pct,
    `Depression (%)` = any_dep_pct,
    `Behavior (%)` = any_behavior_pct,
    `Substance (%)` = any_substance_pct,
    `Other (%)` = any_other_pct)  %>% 
  arrange(desc(`Index Trauma Group`))

kable(rq3_table_show, align = "lccccccccc", 
      caption = "Comorbidity Profiles Among Adolescents With PTSD: Unexpected Death of a Loved One vs. Other Index Traumas")
Comorbidity Profiles Among Adolescents With PTSD: Unexpected Death of a Loved One vs. Other Index Traumas
Index Trauma Group N (PTSD cases) Mean # comorbid disorders Any disorder (%) Fear (%) Distress (%) Depression (%) Behavior (%) Substance (%) Other (%)
Unexpected death of loved one 100 4.35 86.60 63.61 35.40 52.69 53.50 33.19 16.15
All other index traumas 239 4.47 94.41 58.88 35.33 52.99 76.18 36.57 24.27

FIGURE 3:

rq3_graph <- rq3_df %>%
  select(trauma_group,
    any_disorder,
    any_fear,
    any_distress,
    any_dep,
    any_behavior,
    any_substance,
    any_other) %>%
  pivot_longer(cols = -trauma_group,
    names_to = "comorbidity_type",
    values_to = "proportion") %>%
  mutate(
    percent = proportion * 100,
    comorbidity_type = recode(
      comorbidity_type,
      any_disorder  = "Any disorder",
      any_fear      = "Fear",
      any_distress  = "Distress",
      any_dep       = "Depression",
      any_behavior  = "Behavior",
      any_substance = "Substance",
      any_other     = "Other"),
    comorbidity_type = factor(
      comorbidity_type, 
      levels = c("Any disorder", "Fear", "Distress", "Depression", "Behavior", "Substance", "Other"))) %>% 
  mutate(trauma_group = factor(
    trauma_group,
    levels = c("Unexpected death of loved one","All other index traumas")))

ggplot(rq3_graph, aes(x = comorbidity_type, y = percent, fill = trauma_group)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) + 
  scale_fill_manual(values = c(
  "Unexpected death of loved one" = "#DA8EE7",
  "All other index traumas" = "lightgrey")) +
  labs(
    title = "Comorbidity Profiles Among Adolescents With PTSD: Unexpected Death of a Loved One vs. Other Index Traumas",
    x = "Comorbidity Category",
    y = "Comorbidity Prevalence (%)",
    fill = NULL) +
  theme_minimal(base_size = 12) +
  theme(
  panel.background = element_rect(fill = "white", color = NA),
  plot.background  = element_rect(fill = "white", color = NA),
  axis.text.x = element_text(angle = 20, hjust = 1))

TABLE 5:

# Table results:
rq3_tests_table <- tibble(
  "Comorbidity Measure" = c(
    "Mean # of comorbid disorders",
    "Any disorder",
    "Depression",
    "Fear disorders",
    "Distress disorders",
    "Substance disorders",
    "Behavior disorders",
    "Other disorders"),
  Test = c(
    "Survey-weighted t-test",
    rep("Rao–Scott χ²", 7)),
  "Test Statistic" = c(
    round(ttest_res$statistic, 3),
    round(chi_any_disorder$statistic, 3),
    round(chi_any_dep$statistic, 3),
    round(chi_any_fear$statistic, 3),
    round(chi_any_distress$statistic, 3),
    round(chi_any_substance$statistic, 3),
    round(chi_any_behavior$statistic, 3),
    round(chi_any_other$statistic, 3)),
  df = c(
    round(ttest_res$parameter, 1),
    round(chi_any_disorder$parameter[2], 1),
    round(chi_any_dep$parameter[2], 1),
    round(chi_any_fear$parameter[2], 1),
    round(chi_any_distress$parameter[2], 1),
    round(chi_any_substance$parameter[2], 1),
    round(chi_any_behavior$parameter[2], 1),
    round(chi_any_other$parameter[2], 1)),
  p_value = c(
    ttest_res$p.value,
    chi_any_disorder$p.value,
    chi_any_dep$p.value,
    chi_any_fear$p.value,
    chi_any_distress$p.value,
    chi_any_substance$p.value,
    chi_any_behavior$p.value,
    chi_any_other$p.value)) %>%
  mutate(
    p_value = round(p_value, 3))

kable(rq3_tests_table, align = "lcccc",
      caption = "Comorbidity Differences Among Adolescents With PTSD: Unexpected Death of a Loved One vs. Other Index Traumas")
Comorbidity Differences Among Adolescents With PTSD: Unexpected Death of a Loved One vs. Other Index Traumas
Comorbidity Measure Test Test Statistic df p_value
Mean # of comorbid disorders Survey-weighted t-test -0.232 34 0.818
Any disorder Rao–Scott χ² 1.792 35 0.189
Depression Rao–Scott χ² 0.001 35 0.974
Fear disorders Rao–Scott χ² 0.292 35 0.592
Distress disorders Rao–Scott χ² 0.000 35 0.991
Substance disorders Rao–Scott χ² 0.130 35 0.721
Behavior disorders Rao–Scott χ² 4.328 35 0.045
Other disorders Rao–Scott χ² 1.239 35 0.273