R Setup & Load Data Files

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gtsummary)
library(stringr)
# install.packages("ggdist")
library(ggdist)
library(ggpubr)
## Loading required package: ggplot2
library(glue)
# install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(knitr)
# install.packages("naniar")
library(naniar)
library(patchwork)
# install.packages("palmerpenguins")
library(palmerpenguins)
# install.packages("easystats")
library(easystats)
## # Attaching packages: easystats 0.7.5
## ✔ bayestestR  0.17.0   ✔ correlation 0.8.8 
## ✔ datawizard  1.3.0    ✔ effectsize  1.0.2 
## ✔ insight     1.4.6    ✔ modelbased  0.14.0
## ✔ performance 0.16.0   ✔ parameters  0.28.3
## ✔ report      0.6.3    ✔ see         0.13.0
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.6
## ✔ lubridate 1.9.4     ✔ tibble    3.3.1
## ✔ purrr     1.2.1     ✔ tidyr     1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()             masks stats::filter()
## ✖ dplyr::lag()                masks stats::lag()
## ✖ datawizard::recode_values() masks dplyr::recode_values()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gt)
library(tidyr)

library(rpart)
library(rpart.plot)

library(visNetwork)
library(sparkline)

library(ggplot2)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggparty)
## Loading required package: partykit
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
## 
## Attaching package: 'mvtnorm'
## 
## The following object is masked from 'package:modelbased':
## 
##     standardize
## 
## The following object is masked from 'package:effectsize':
## 
##     standardize
## 
## The following object is masked from 'package:datawizard':
## 
##     standardize
library(partykit)

nis_2022_core <- read_sas(
  "/meta/databases/NIS/2022/nis_2022_core.sas7bdat",NULL)
View(nis_2022_core)

ICD 10 Codes to Define IDD Populations for 18-64 & 65+

idd_codes<- c("E7871", "E7872", "F70", "F71", "F72", "F73", "F78", "F78A1","F78A9", "F79", "P043", "Q860", "Q871", "Q8711", "Q8719", "Q872", "Q873", "Q875", "Q8781", "Q8783", "Q8784", "Q8785", "Q8789", "Q897", "Q898", "Q900", "Q901", "Q902", "Q909", "Q910", "Q911", "Q912", "Q913", "Q914", "Q915", "Q916", "Q917", "Q920", "Q921", "Q922", "Q925", "Q9261", "Q9262", "Q927", "Q928", "Q929", "Q930", "Q931", "Q932", "Q933", "Q934", "Q935", "Q9351", "Q9352", "Q9359", "Q937", "Q9381", "Q9388", "Q9389", "Q939", "Q952", "Q953", "Q992",    # Intellectual Disabilites & Related Conditions 
"F800", "F801", "F802", "F804", "F8081", "F8082", "F8089", "F809", "F810", "F812", "F8181", "F8189", "F819", "F82", "H9325", "R480", # Learning Disabilites
"F840", "F843", "F845", "F848", "F849", # Autism Spectrum Disorders
"G800", "G801", "G802", "G803", "G804", "G808", "G809" # Cerebral Palsy
) 

dx_cols <- paste0("I10_DX", 1:40)
nis_2022_core[dx_cols] <- lapply(nis_2022_core[dx_cols], as.character)
idd_rows <- apply(
  nis_2022_core[, dx_cols],
  1,
  function(x) any(x %in% idd_codes)
)

nis_idd <- nis_2022_core[idd_rows, ]

nis_idd$age_group <- NA
nis_idd$age_group[nis_idd$AGE >= 18 & nis_idd$AGE <= 64] <- "18-64"
nis_idd$age_group[nis_idd$AGE >= 65] <- "65+"

nis_idd_adults <- nis_idd[!is.na(nis_idd$age_group), ]
table(nis_idd_adults$age_group)
## 
## 18-64   65+ 
## 46683 10545
nis_idd_18_64 <- nis_idd_adults[nis_idd_adults$age_group == "18-64", ]
nis_idd_65plus <- nis_idd_adults[nis_idd_adults$age_group == "65+", ]

Distributions of Continuous Vars

bw = 3 

p1 <- ggplot(nis_idd_18_64, aes(x = AGE)) + 
  geom_histogram(binwidth = bw,
                 fill = "black", col =  "yellow") + 
  stat_function(fun = function(x)
    dnorm(x, mean = mean(nis_idd_18_64$AGE,
                         na.rm = TRUE),
          sd = sd(nis_idd_18_64$AGE,
                  na.rm=TRUE)) *
      length(nis_idd_18_64$AGE) * bw,
    geom = "area", alpha = 0.5,
    fill = "lightblue", col = "blue"
    ) + 
  labs (
    x = "Age in Years",
    title = "Histogram & Normal Curve"
  ) +
  theme_bw()

p2 <- ggplot(nis_idd_18_64, aes(sample = AGE)) +
  geom_qq() + 
  geom_qq_line(col="red") + 
  labs(
    y = "Age in Years",
    x = "Standard Normal Distribution",
    title = "Normal Q-Q plot"
  )+
  theme_bw()

p3 <- ggplot(nis_idd_18_64, aes(x = AGE, y = "" ))+ 
  geom_violin(fill = "cornsilk") +
  geom_boxplot(width = 0.2) + 
  stat_summary(
    fun = mean, geom = "point", 
    shape = 16, col = "red"
  ) + 
  labs(
    y = "", x = "Age in Years",
    title = "Boxplot with Violin"
  ) +
  theme_bw()

p1 + (p2 / p3 + plot_layout(heights = c(2,1))) + 
  plot_annotation(
    title = "2022 NIS IDD Population 18-64: Distribution of Ages"
  )
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-transparency is
## not supported on this device: reported only once per page

binw = 16 

p4 <- ggplot(nis_idd_18_64, aes(x = LOS)) + 
  geom_histogram(binwidth = binw,
                 fill = "black", col =  "yellow") + 
  stat_function(fun = function(x)
    dnorm(x, mean = mean(nis_idd_18_64$LOS,
                         na.rm = TRUE),
          sd = sd(nis_idd_18_64$LOS,
                  na.rm=TRUE)) *
      length(nis_idd_18_64$LOS) * binw,
    geom = "area", alpha = 0.5,
    fill = "lightblue", col = "blue"
    ) + 
  labs (
    x = "Length of Stay in Days",
    title = "Histogram & Normal Curve"
  ) +
  theme_bw()

p5 <- ggplot(nis_idd_18_64, aes(sample = LOS)) +
  geom_qq() + 
  geom_qq_line(col="red") + 
  labs(
    y = "Length of Stay in Days",
    x = "Standard Normal Distribution",
    title = "Normal Q-Q plot"
  )+
  theme_bw()

p6 <- ggplot(nis_idd_18_64, aes(x = LOS, y = "" ))+ 
  geom_violin(fill = "cornsilk") +
  geom_boxplot(width = 0.2) + 
  stat_summary(
    fun = mean, geom = "point", 
    shape = 16, col = "red"
  ) + 
  labs(
    y = "", x = "Length of Stay in Days",
    title = "Boxplot with Violin"
  ) +
  theme_bw()

p4 + (p5 / p6 + plot_layout(heights = c(2,1))) + 
  plot_annotation(
    title = "2022 NIS IDD Population 18-64: Distribution of Length of Stay"
  )
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_qq()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_qq_line()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-transparency is
## not supported on this device: reported only once per page

binnedw = 25000 

p7 <- ggplot(nis_idd_18_64, aes(x = TOTCHG)) + 
  geom_histogram(binwidth = binnedw,
                 fill = "black", col =  "yellow") + 
  stat_function(fun = function(x)
    dnorm(x, mean = mean(nis_idd_18_64$TOTCHG,
                         na.rm = TRUE),
          sd = sd(nis_idd_18_64$TOTCHG,
                  na.rm=TRUE)) *
      length(nis_idd_18_64$TOTCHG) * binnedw,
    geom = "area", alpha = 0.5,
    fill = "lightblue", col = "blue"
    ) + 
  labs (
    x = "Total Charges in $",
    title = "Histogram & Normal Curve"
  ) +
  theme_bw()

p8 <- ggplot(nis_idd_18_64, aes(sample = TOTCHG)) +
  geom_qq() + 
  geom_qq_line(col="red") + 
  labs(
    y = "Total Charges in $",
    x = "Standard Normal Distribution",
    title = "Normal Q-Q plot"
  )+
  theme_bw()

p9 <- ggplot(nis_idd_18_64, aes(x = TOTCHG, y = "" ))+ 
  geom_violin(fill = "cornsilk") +
  geom_boxplot(width = 0.2) + 
  stat_summary(
    fun = mean, geom = "point", 
    shape = 16, col = "red"
  ) + 
  labs(
    y = "", x = "Total Charges in $",
    title = "Boxplot with Violin"
  ) +
  theme_bw()

p7 + (p8 / p9 + plot_layout(heights = c(2,1))) + 
  plot_annotation(
    title = "2022 NIS IDD Population 18-64: Distribution of Total Charges"
  )
## Warning: Removed 365 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 365 rows containing non-finite outside the scale range
## (`stat_qq()`).
## Warning: Removed 365 rows containing non-finite outside the scale range
## (`stat_qq_line()`).
## Warning: Removed 365 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 365 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 365 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-transparency is
## not supported on this device: reported only once per page

summary(nis_idd_18_64$LOS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   3.000   5.000   8.174   9.000 352.000      17
summary(nis_idd_18_64$TOTCHG)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     116   20822   39672   79804   80766 8943057     365

Demographics Information on Population 18-64

You can also embed plots, for example:

table1_data <- nis_idd_18_64 %>%
  select(
    AGE,
    FEMALE,
    RACE,
    PAY1,
    ZIPINC_QRTL,
    PL_NCHS,
    LOS,
    TOTCHG,
    DIED,
    ELECTIVE,
    DISPUNIFORM,
    TRAN_IN,
    TRAN_OUT) %>%
  mutate(
    FEMALE =factor(FEMALE, levels = c(0,1), labels = c("Male", "Female")),
    DIED = factor(DIED, levels = c(0,1), labels = c("Alive", "Dead")),
    ELECTIVE = factor(ELECTIVE, levels = c(0,1), labels = c("Non-Elective", "Elective")),
   
    RACE = factor(
      RACE,
      levels = c(1, 2, 3, 4, 5, 6 ),
      labels = c("White", "Black", "Hispanic", "Asian or Pacific Islander", "Native American", "Other")
      ),
      
    PAY1 = factor(
      PAY1,
      levels = c(1, 2, 3, 4, 5, 6),
      labels = c("Medicare", "Medicaid", "Private insurance", "Self-pay", "No Charge", "Other")
      ), 
      
    ZIPINC_QRTL = factor(
      ZIPINC_QRTL,
      levels = c(1, 2, 3, 4),
      labels = c("0-25th percentile", "26-50th percentile", "51st-75th percentile", "76th-100th percentile")
      ), 
    
    PL_NCHS = factor(
      PL_NCHS,
      levels = c(1, 2, 3, 4, 5, 6),
      labels = c("Central Counties >=1 million", "Fringe Counties >= 1 million", "County 250,000-999,999 population", "County 50,000- 249,999 population", "Micropolitan Counties", "Not Metropolitan or Micropolitan Counties")
      ),
    
    DISPUNIFORM = factor(
      DISPUNIFORM,
      levels = c(1, 2, 5, 6, 7, 20, 21, 99),
      labels = c("Routine (Discharged to Home or Self Care)", "Transfer to Short-Term Hospital", "Transfer Other: Skilled Nursing Facility, Intermediate Care Facility, Another Type of Facility", "Home Health Care", "Against Medical Advice", "Died in Hospital", "Discharged/transferred to court/law enforcement", "Discharged alive, destination unknown")
      ),
    
     TRAN_IN = factor(
      TRAN_IN,
      levels = c(0, 1, 2),
      labels = c("Not Transferred In", "Transferred in from a different acute hospital", "Transferred in from another type of health facility")
      ),
    
    TRAN_OUT = factor(
      TRAN_OUT,
      levels = c(0, 1, 2),
      labels = c("Not a Transfer", "Transferred out to a different acute hospital", "Transferred out to another type of health facility")
      )
  ) %>%
  rename(
    "Age, years" = AGE,
    "Sex" = FEMALE, 
    "Race/Ethnicity" = RACE,
    "Primary Payer" = PAY1,
    "ZIP Code Income Quartile" = ZIPINC_QRTL,
    "Urban-rural residence" = PL_NCHS,
    "Length of Stay, Days" = LOS, 
    "Total Charge, $" = TOTCHG,
    "In-hospital Mortality" = DIED, 
    "Admission Type" = ELECTIVE,
    "Discharge Disposition" = DISPUNIFORM,
    "Transfer In Status" = TRAN_IN,
    "Transfer Out Status" = TRAN_OUT)
          
tilde_sign <- rawToChar(as.raw(126))

stat_formula <- as.formula(
  paste("all_continuous()", tilde_sign, "\" {mean} \u00b1 {sd}\"")
)

table1_idd <- table1_data %>%
  tbl_summary(
    statistic = list(stat_formula),
    missing = "ifany"
  ) %>%
  bold_labels()

table1_idd
Characteristic N = 46,6831
Age in years at admission 39 ± 14
Sex
    Male 27,157 (58%)
    Female 19,515 (42%)
    Unknown 11
Race/Ethnicity
    White 29,844 (66%)
    Black 8,012 (18%)
    Hispanic 4,938 (11%)
    Asian or Pacific Islander 849 (1.9%)
    Native American 326 (0.7%)
    Other 1,385 (3.1%)
    Unknown 1,329
Primary Payer
    Medicare 20,492 (44%)
    Medicaid 16,627 (36%)
    Private insurance 7,841 (17%)
    Self-pay 809 (1.7%)
    No Charge 42 (<0.1%)
    Other 809 (1.7%)
    Unknown 63
ZIP Code Income Quartile
    0-25th percentile 14,808 (32%)
    26-50th percentile 12,282 (27%)
    51st-75th percentile 10,588 (23%)
    76th-100th percentile 8,266 (18%)
    Unknown 739
Urban-rural residence
    Central Counties >=1 million 13,242 (29%)
    Fringe Counties >= 1 million 11,127 (24%)
    County 250,000-999,999 population 10,074 (22%)
    County 50,000- 249,999 population 4,752 (10%)
    Micropolitan Counties 4,569 (9.8%)
    Not Metropolitan or Micropolitan Counties 2,636 (5.7%)
    Unknown 283
Length of stay (cleaned) 8 ± 15
    Unknown 17
Total charges (cleaned) 79,804 ± 167,297
    Unknown 365
In-hospital Mortality
    Alive 45,729 (98%)
    Dead 927 (2.0%)
    Unknown 27
Admission Type
    Non-Elective 41,518 (89%)
    Elective 5,089 (11%)
    Unknown 76
Discharge Disposition
    Routine (Discharged to Home or Self Care) 29,915 (64%)
    Transfer to Short-Term Hospital 1,075 (2.3%)
    Transfer Other: Skilled Nursing Facility, Intermediate Care Facility, Another Type of Facility 8,413 (18%)
    Home Health Care 5,773 (12%)
    Against Medical Advice 551 (1.2%)
    Died in Hospital 927 (2.0%)
    Discharged/transferred to court/law enforcement 0 (0%)
    Discharged alive, destination unknown 2 (<0.1%)
    Unknown 27
Transfer In Status
    Not Transferred In 37,942 (82%)
    Transferred in from a different acute hospital 4,493 (9.7%)
    Transferred in from another type of health facility 3,866 (8.3%)
    Unknown 382
Transfer Out Status
    Not a Transfer 37,168 (80%)
    Transferred out to a different acute hospital 1,075 (2.3%)
    Transferred out to another type of health facility 8,413 (18%)
    Unknown 27
1 Mean ± SD; n (%)

Distributions of Continuous Vars Without Outliers

## For charges 

Q1 <- quantile(table1_data$`Total Charge, $`, 0.25, na.rm = TRUE)
Q3 <- quantile(table1_data$`Total Charge, $`, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1 

lower_bound <- Q1 - (1.5 * IQR_val)
upper_bound <- Q3 + (1.5 * IQR_val)

# seeing how many values being removed
sum(table1_data$`Total Charge, $` < lower_bound | table1_data$`Total Charge, $` > upper_bound, na.rm = TRUE) 
## [1] 4496
## For hospital stay 
Quart1 <- quantile(table1_data$`Length of Stay, Days`, 0.25, na.rm = TRUE)
Quart3 <- quantile(table1_data$`Length of Stay, Days`, 0.75, na.rm = TRUE)
IQRStay <- Quart3 - Quart1 

l_bound <- Quart1 - (1.5 * IQRStay)
u_bound <- Quart3 + (1.5 * IQRStay)

# seeing how many values being removed
sum(table1_data$`Length of Stay, Days` < l_bound | table1_data$`Length of Stay, Days` > u_bound, na.rm = TRUE) 
## [1] 3683
nis_idd_18_64_no_outliers <- table1_data %>%
  filter(`Total Charge, $` >= lower_bound & `Total Charge, $` <= upper_bound,
         `Length of Stay, Days` >= l_bound & `Length of Stay, Days` <= u_bound)


## new plots 

binnn = 25000 

p10 <- ggplot(nis_idd_18_64_no_outliers, aes(x = `Total Charge, $`)) + 
  geom_histogram(binwidth = binnn,
                 fill = "black", col =  "yellow") + 
  stat_function(fun = function(x)
    dnorm(x, mean = mean(nis_idd_18_64_no_outliers$`Total Charge, $`,
                         na.rm = TRUE),
          sd = sd(nis_idd_18_64_no_outliers$`Total Charge, $`,
                  na.rm=TRUE)) *
      length(nis_idd_18_64_no_outliers$`Total Charge, $`) * binnn,
    geom = "area", alpha = 0.5,
    fill = "lightblue", col = "blue"
    ) + 
  labs (
    x = "Total Charges",
    title = "Histogram & Normal Curve"
  ) +
  theme_bw()

p11 <- ggplot(nis_idd_18_64_no_outliers, aes(sample = `Total Charge, $`)) +
  geom_qq() + 
  geom_qq_line(col="red") + 
  labs(
    y = "Total Charges",
    x = "Standard Normal Distribution",
    title = "Normal Q-Q plot"
  )+
  theme_bw()

p12 <- ggplot(nis_idd_18_64_no_outliers, aes(x =  `Total Charge, $`, y = "" ))+ 
  geom_violin(fill = "cornsilk") +
  geom_boxplot(width = 0.2) + 
  stat_summary(
    fun = mean, geom = "point", 
    shape = 16, col = "red"
  ) + 
  labs(
    y = "", x = "Total Charges",
    title = "Boxplot with Violin"
  ) +
  theme_bw()

p10 + (p11 / p12 + plot_layout(heights = c(2,1))) + 
  plot_annotation(
    title = "2022 NIS IDD Population 18-64: Distribution of Total Charges No Outliers"
  )
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-transparency is
## not supported on this device: reported only once per page

binner = 4 

p13 <- ggplot(nis_idd_18_64_no_outliers, aes(x = `Length of Stay, Days`)) + 
  geom_histogram(binwidth = binner,
                 fill = "black", col =  "yellow") + 
  stat_function(fun = function(x)
    dnorm(x, mean = mean(nis_idd_18_64_no_outliers$`Length of Stay, Days`,
                         na.rm = TRUE),
          sd = sd(nis_idd_18_64_no_outliers$`Length of Stay, Days`,
                  na.rm=TRUE)) *
      length(nis_idd_18_64_no_outliers$`Length of Stay, Days`) * binner,
    geom = "area", alpha = 0.5,
    fill = "lightblue", col = "blue"
    ) + 
  labs (
    x = "Length of Stay",
    title = "Histogram & Normal Curve"
  ) +
  theme_bw()

p14 <- ggplot(nis_idd_18_64_no_outliers, aes(sample = `Length of Stay, Days`)) +
  geom_qq() + 
  geom_qq_line(col="red") + 
  labs(
    y = "Length of Stay",
    x = "Standard Normal Distribution",
    title = "Normal Q-Q plot"
  )+
  theme_bw()

p15 <- ggplot(nis_idd_18_64_no_outliers, aes(x = `Length of Stay, Days`, y = "" ))+ 
  geom_violin(fill = "cornsilk") +
  geom_boxplot(width = 0.2) + 
  stat_summary(
    fun = mean, geom = "point", 
    shape = 16, col = "red"
  ) + 
  labs(
    y = "", x = "Length of Stay",
    title = "Boxplot with Violin"
  ) +
  theme_bw()

p13 + (p14 / p15 + plot_layout(heights = c(2,1))) + 
  plot_annotation(
    title = "2022 NIS IDD Population 18-64: Distribution of Length of Stay No Outliers"
  )
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-transparency is
## not supported on this device: reported only once per page

ICD-10 DX_1 Information on Population 18-64

dx1_table <- nis_idd_18_64 %>%
  filter(!is.na(I10_DX1)) %>%
  count(I10_DX1, sort= TRUE) %>%
  slice_head(n=20) %>%
  mutate(
    Percentage = round(n / nrow(nis_idd_18_64) * 100, 1),
    Description = case_when(
     I10_DX1 == "A419" ~ "Sepsis, unspecified organism", 
     I10_DX1 == "U071" ~ "COVID-19",
     I10_DX1 == "F250" ~ "Schizoaffective disorder, bipolar type",
     I10_DX1 == "J690" ~ "Pneumonitis due to inhalation of food and vomit",
     I10_DX1 == "F332" ~ "Major depressive disorder, recurrent severe without psychotic features",
     I10_DX1 == "G40909" ~ "Epilepsy, unspecified, not intractable, without status epilepticus",
     I10_DX1 == "A4189" ~ "Other specified sepsis",
     I10_DX1 == "F209" ~ "Schizophrenia, unspecified",
     I10_DX1 == "F259" ~ "Schizoaffective disorder, unspecified",
     I10_DX1 == "F319" ~ "Bipolar disorder, unspecified",
     I10_DX1 == "J189" ~ "Pneumonia, unspecified organism",
     I10_DX1 == "N390" ~ "Urinary tract infection, site not specified",
     I10_DX1 == "N179" ~ "Acute kindey failure, unspecified",
     I10_DX1 == "F840" ~ "Autistic disorder",
     I10_DX1 == "F29" ~ "Unspecified psychosis not due to a substance or known physiological condition",
     I10_DX1 == "E1010" ~ "Type 1 diabetes mellitus with ketoacidosis without coma",
     I10_DX1 == "J9601" ~ "Acute respiratory failure with hypoxia",
     I10_DX1 == "A4151" ~ "Sepsi due to Escherichia coli",
     I10_DX1 == "F200" ~ "Paranoid Schizophrenia",
     I10_DX1 == "F333" ~ "Major depressive disorder, recurrent, severe with psychotic symptoms",
     TRUE ~ "Unknown"
    )
  ) %>%
  rename(
    "Primary Diagnosis Code" = I10_DX1,
    "Frequency" = n, 
    "Percentage(%)" = Percentage
  ) %>%
  select("Primary Diagnosis Code", "Description", "Frequency", "Percentage(%)")

dx1_table
## # A tibble: 20 × 4
##    `Primary Diagnosis Code` Description                Frequency `Percentage(%)`
##    <chr>                    <chr>                          <int>           <dbl>
##  1 A419                     Sepsis, unspecified organ…      4677            10  
##  2 U071                     COVID-19                        1452             3.1
##  3 F250                     Schizoaffective disorder,…      1322             2.8
##  4 J690                     Pneumonitis due to inhala…      1133             2.4
##  5 F332                     Major depressive disorder…       879             1.9
##  6 G40909                   Epilepsy, unspecified, no…       772             1.7
##  7 A4189                    Other specified sepsis           754             1.6
##  8 F209                     Schizophrenia, unspecified       729             1.6
##  9 F259                     Schizoaffective disorder,…       554             1.2
## 10 F319                     Bipolar disorder, unspeci…       546             1.2
## 11 J189                     Pneumonia, unspecified or…       535             1.1
## 12 N390                     Urinary tract infection, …       453             1  
## 13 N179                     Acute kindey failure, uns…       429             0.9
## 14 F840                     Autistic disorder                402             0.9
## 15 F29                      Unspecified psychosis not…       358             0.8
## 16 E1010                    Type 1 diabetes mellitus …       348             0.7
## 17 J9601                    Acute respiratory failure…       335             0.7
## 18 A4151                    Sepsi due to Escherichia …       329             0.7
## 19 F200                     Paranoid Schizophrenia           322             0.7
## 20 F333                     Major depressive disorder…       322             0.7
dx1_table %>%
  gt() %>%
  tab_header(title = "Top 20 Primary Diagnoses in IDD Population") %>%
  fmt_number(columns = "Frequency", decimals= 0, use_seps = TRUE) %>%
  cols_width("Description" ~ px(400))
Top 20 Primary Diagnoses in IDD Population
Primary Diagnosis Code Description Frequency Percentage(%)
A419 Sepsis, unspecified organism 4,677 10.0
U071 COVID-19 1,452 3.1
F250 Schizoaffective disorder, bipolar type 1,322 2.8
J690 Pneumonitis due to inhalation of food and vomit 1,133 2.4
F332 Major depressive disorder, recurrent severe without psychotic features 879 1.9
G40909 Epilepsy, unspecified, not intractable, without status epilepticus 772 1.7
A4189 Other specified sepsis 754 1.6
F209 Schizophrenia, unspecified 729 1.6
F259 Schizoaffective disorder, unspecified 554 1.2
F319 Bipolar disorder, unspecified 546 1.2
J189 Pneumonia, unspecified organism 535 1.1
N390 Urinary tract infection, site not specified 453 1.0
N179 Acute kindey failure, unspecified 429 0.9
F840 Autistic disorder 402 0.9
F29 Unspecified psychosis not due to a substance or known physiological condition 358 0.8
E1010 Type 1 diabetes mellitus with ketoacidosis without coma 348 0.7
J9601 Acute respiratory failure with hypoxia 335 0.7
A4151 Sepsi due to Escherichia coli 329 0.7
F200 Paranoid Schizophrenia 322 0.7
F333 Major depressive disorder, recurrent, severe with psychotic symptoms 322 0.7

Top 15 Primary Procedure Codes in IDD Patients with Sepsis as Primary Diagnosis

sepsis_pr1_table <- nis_idd_18_64 %>%
  filter(I10_DX1 == "A419") %>%
  filter(!is.na(I10_PR1) & I10_PR1 !="") %>%
  count(I10_PR1, sort = TRUE) %>%
  slice_head(n=15) %>%
  mutate(
    Percentage = round(n / nrow(filter(nis_idd_18_64, I10_DX1 == "A419"))* 100, 1),
    Description = case_when(
     I10_PR1 == "5A1955Z" ~ "Respiratory ventilation, greater than 96 consecutive hours", 
     I10_PR1 == "02HV33Z" ~ "Insertion of infusion device into superior vena cava, percutaneous approach",
     I10_PR1 == "5A1945Z" ~ "Respiratory ventilation, 24-96 consecutive hours",
     I10_PR1 == "0BH17EZ" ~ "Insertion of endotracheal airway into trachea, via natural or artifical opening",
     I10_PR1 == "5A09357" ~ "Assistance with respiratory ventilation,less than 24 consecutive hours, continuous positive airway pressure",
     I10_PR1 == "0DH63UZ" ~ "Insertion of feeding devicee into stomach, percutaneous approach",
     I10_PR1 == "3E03329" ~ "Introduction of anti-infective into peripheral vein, percutaneous approach",
     I10_PR1 == "3E0G76Z" ~ "Introduction of other therapeutic substance into upper GI, endoscopic approach",
     I10_PR1 == "3E033XZ" ~ "Introduction of other therapeutic substance into peripheral vein, percutaneous approach",
     I10_PR1 == "30233N1" ~ "Transfusion of nonautologous red blood cells into peripheral vein, percutaneous approach",
     I10_PR1 == "XW033E5" ~ "Introduction of remdesivir anti-infective into peripheral vein, percutaneous approach",
     I10_PR1 == "009U3ZX" ~ "Drainage of spinal canal, percutaneous approach, diagnostic",
     I10_PR1 == "5A1935Z" ~ "Respiratory ventilation, less than 24 consecutive hours",
     I10_PR1 == "8E0ZXY6" ~ "Robotic assisted procedure of trunk region, external approach",
     I10_PR1 == "0FT44ZZ" ~ "Resection of gallbladder, percutaneous endoscopic approach",
     TRUE ~ "Unknown"
    )
        ) %>%
  rename(
    "ICD-10-PCS Code" = I10_PR1,
    "Frequency" = n,
    "Percent(%)" = Percentage) %>%
  select("ICD-10-PCS Code", "Description", "Frequency", "Percent(%)")

sepsis_pr1_table %>%
  gt() %>%
  tab_header(
    title = "Top 15 Primary Procedure Codes in IDD Patients with Sepsis as Primary Diagnosis"
  ) %>%
  fmt_number(columns = "Frequency", decimals = 0, use_seps = TRUE) %>%
  tab_style(
    style = cell_text(weight = "bold"), 
    locations = cells_column_labels()
  )
Top 15 Primary Procedure Codes in IDD Patients with Sepsis as Primary Diagnosis
ICD-10-PCS Procedure 1 Description Frequency Percent(%)
5A1955Z Respiratory ventilation, greater than 96 consecutive hours 257 5.5
02HV33Z Insertion of infusion device into superior vena cava, percutaneous approach 223 4.8
5A1945Z Respiratory ventilation, 24-96 consecutive hours 104 2.2
0BH17EZ Insertion of endotracheal airway into trachea, via natural or artifical opening 97 2.1
5A09357 Assistance with respiratory ventilation,less than 24 consecutive hours, continuous positive airway pressure 84 1.8
0DH63UZ Insertion of feeding devicee into stomach, percutaneous approach 58 1.2
3E03329 Introduction of anti-infective into peripheral vein, percutaneous approach 57 1.2
3E0G76Z Introduction of other therapeutic substance into upper GI, endoscopic approach 57 1.2
3E033XZ Introduction of other therapeutic substance into peripheral vein, percutaneous approach 54 1.2
30233N1 Transfusion of nonautologous red blood cells into peripheral vein, percutaneous approach 47 1.0
XW033E5 Introduction of remdesivir anti-infective into peripheral vein, percutaneous approach 46 1.0
009U3ZX Drainage of spinal canal, percutaneous approach, diagnostic 45 1.0
5A1935Z Respiratory ventilation, less than 24 consecutive hours 43 0.9
8E0ZXY6 Robotic assisted procedure of trunk region, external approach 38 0.8
0FT44ZZ Resection of gallbladder, percutaneous endoscopic approach 34 0.7

ICD-10 PR_1 Information on Population 18-64

pr1_table <- nis_idd_18_64 %>%
  filter(!is.na(I10_PR1)) %>%
  count(I10_PR1, sort= TRUE) %>%
  slice_head(n=20) %>%
  mutate(
    Percentage = round(n / nrow(nis_idd_18_64) * 100, 1),
    Description = case_when(
     I10_PR1 == "" ~ "No procedure recorded", 
     I10_PR1 == "GZHZZZZ" ~ "Group Psychotherapy",
     I10_PR1 == "XW033E5" ~ "Introduction of remdesivir anti-infective into peripheral vein, percutaneous approach",
     I10_PR1 == "4A10X4Z" ~ "Monitoring of central nervous system electrical activity, external approach",
     I10_PR1 == "02HV33Z" ~ "Insertion of infusion device into superior vena cava, percutaneous approach",
     I10_PR1 == "5A1955Z" ~ "Respiratory ventilation, greater than 96 consecutive hours",
     I10_PR1 == "10E0XZZ" ~ "Delivery of products of contraception, external approach",
     I10_PR1 == "5A09357" ~ "Assistance with respiratory ventilation,less than 24 consecutive hours, continuous positive airway pressure",
     I10_PR1 == "10D00Z1" ~ "Extraction of products of contraception, low cervical, open approach",
     I10_PR1 == "5A1945Z" ~ "Respiratory ventilation,24-96 consecutive hours",
     I10_PR1 == "8E0ZXY6" ~ "Isolation",
     I10_PR1 == "5A1D70Z" ~ "Performance of urinary filtration, intermittent, less than 6 hours per day",
     I10_PR1 == "30233N1" ~ "Transfusion of nonautologous (donor) red blood cells into peripheral vein, percutaneous approach",
     I10_PR1 == "0BH17EZ" ~ "Insertion of endotracheal airway into trachea, via natural or artifical opening",
     I10_PR1 == "009U3ZX" ~ "Drainage of spinal canal, percutaneous approach, diagnostic",
     I10_PR1 == "0DH63UZ" ~ "Insertion of feeding device into stomach, percutaneous approach",
     I10_PR1 == "0DJ08ZZ" ~ "Inspection of upper intestinal tract, via natural or artifical opening endoscopic",
     I10_PR1 == "GZ3ZZZ" ~ "Mental health medication management",
     I10_PR1 == "0FT44ZZ" ~ "Resection of gallbladder, percutaneous endoscopic approach",
     I10_PR1 == "3E0G76Z" ~ "Introduction of a nutritional substance into the upper GI tract, via a natural or artifical opening",
     TRUE ~ "Unknown"
    )
  ) %>%
  rename(
    "Primary Procedure Code" = I10_PR1,
    "Frequency" = n, 
    "Percentage(%)" = Percentage
  ) %>%
  select("Primary Procedure Code", "Description", "Frequency", "Percentage(%)")

pr1_table
## # A tibble: 20 × 4
##    `Primary Procedure Code` Description                Frequency `Percentage(%)`
##    <chr>                    <chr>                          <int>           <dbl>
##  1 ""                       No procedure recorded          22390            48  
##  2 "GZHZZZZ"                Group Psychotherapy             1037             2.2
##  3 "XW033E5"                Introduction of remdesivi…       861             1.8
##  4 "4A10X4Z"                Monitoring of central ner…       789             1.7
##  5 "02HV33Z"                Insertion of infusion dev…       772             1.7
##  6 "5A1955Z"                Respiratory ventilation, …       760             1.6
##  7 "10E0XZZ"                Delivery of products of c…       672             1.4
##  8 "5A09357"                Assistance with respirato…       500             1.1
##  9 "10D00Z1"                Extraction of products of…       469             1  
## 10 "5A1945Z"                Respiratory ventilation,2…       461             1  
## 11 "8E0ZXY6"                Isolation                        453             1  
## 12 "5A1D70Z"                Performance of urinary fi…       421             0.9
## 13 "30233N1"                Transfusion of nonautolog…       354             0.8
## 14 "0BH17EZ"                Insertion of endotracheal…       353             0.8
## 15 "009U3ZX"                Drainage of spinal canal,…       313             0.7
## 16 "0DH63UZ"                Insertion of feeding devi…       292             0.6
## 17 "0DJ08ZZ"                Inspection of upper intes…       292             0.6
## 18 "GZ3ZZZZ"                Unknown                          280             0.6
## 19 "0FT44ZZ"                Resection of gallbladder,…       237             0.5
## 20 "3E0G76Z"                Introduction of a nutriti…       234             0.5
pr1_table %>%
  gt() %>%
  tab_header(title = "Top 20 Primary Procedures in IDD Population") %>%
  fmt_number(columns = "Frequency", decimals = 0, use_seps = TRUE)
Top 20 Primary Procedures in IDD Population
ICD-10-PCS Procedure 1 Description Frequency Percentage(%)
No procedure recorded 22,390 48.0
GZHZZZZ Group Psychotherapy 1,037 2.2
XW033E5 Introduction of remdesivir anti-infective into peripheral vein, percutaneous approach 861 1.8
4A10X4Z Monitoring of central nervous system electrical activity, external approach 789 1.7
02HV33Z Insertion of infusion device into superior vena cava, percutaneous approach 772 1.7
5A1955Z Respiratory ventilation, greater than 96 consecutive hours 760 1.6
10E0XZZ Delivery of products of contraception, external approach 672 1.4
5A09357 Assistance with respiratory ventilation,less than 24 consecutive hours, continuous positive airway pressure 500 1.1
10D00Z1 Extraction of products of contraception, low cervical, open approach 469 1.0
5A1945Z Respiratory ventilation,24-96 consecutive hours 461 1.0
8E0ZXY6 Isolation 453 1.0
5A1D70Z Performance of urinary filtration, intermittent, less than 6 hours per day 421 0.9
30233N1 Transfusion of nonautologous (donor) red blood cells into peripheral vein, percutaneous approach 354 0.8
0BH17EZ Insertion of endotracheal airway into trachea, via natural or artifical opening 353 0.8
009U3ZX Drainage of spinal canal, percutaneous approach, diagnostic 313 0.7
0DH63UZ Insertion of feeding device into stomach, percutaneous approach 292 0.6
0DJ08ZZ Inspection of upper intestinal tract, via natural or artifical opening endoscopic 292 0.6
GZ3ZZZZ Unknown 280 0.6
0FT44ZZ Resection of gallbladder, percutaneous endoscopic approach 237 0.5
3E0G76Z Introduction of a nutritional substance into the upper GI tract, via a natural or artifical opening 234 0.5

Top 10 Primary Diagnoses in IDD Patients with No Procedure Recorded

no_proc_dx1_table <- nis_idd_18_64 %>%
  filter(is.na(I10_PR1)| I10_PR1 == "") %>%
  filter(!is.na(I10_DX1) & I10_DX1 != "") %>% 
  count(I10_DX1, sort =TRUE) %>% 
  slice_head(n=15) %>% 
  mutate(
    Percentage = round(n / nrow(filter(nis_idd_18_64, is.na(I10_PR1) | I10_PR1 == "")) * 100,1),
    Description = case_when(
     I10_DX1 == "A419" ~ "Sepsis, unspecified organism", 
     I10_DX1 == "F250" ~ "Schizoaffective disorder, bipolar type",
     I10_DX1 == "F332" ~ "Major depressive disorder, recurrent severe without psychotic features",
     I10_DX1 == "J690" ~ "Pneumonitis due to inhalation of food and vomit",
     I10_DX1 == "F209" ~ "Schizophrenia, unspecified",
     I10_DX1 == "G40909" ~ "Epilepsy, unspecified, not intractable, without status epilepticus",
     I10_DX1 == "F319" ~ "Bipolar disorder, unspecified",
     I10_DX1 == "F259" ~ "Schizoaffective disorder, unspecified",
     I10_DX1 == "U071" ~ "COVID-19",
     I10_DX1 == "J189" ~ "Pneumonia, unspecified organism",
     I10_DX1 == "F840" ~ "Austistic Disorder",
     I10_DX1 == "N390" ~ "Urinary tract infection, site not specified",
     I10_DX1 == "F29"  ~ "Unspecified psychosis not due to a substance or known physiological condition",
     I10_DX1 == "N179" ~ "Acute kindey failure, unspecified",
     I10_DX1 == "E1010" ~ "Type 1 diabetes mellitus with ketoacidosis without coma",
     TRUE ~ "Unknown"
    )
  ) %>%
  rename(
    "Primary Diagnosis Code" = I10_DX1,
    "Frequency" = n,
    "Percent (%)"= Percentage) %>%
  select("Primary Diagnosis Code", "Description", "Frequency", "Percent (%)")

no_proc_dx1_table %>%
  gt() %>%
  tab_header(
    title = "Top 10 Primary Diagnoses in IDD Patients with No Procedure Recorded"
  ) %>%
  fmt_number(columns = "Frequency", decimals = 0, use_seps = TRUE) %>%
  tab_footnote(
    footnote = "Restricted to hospitalizations where I10_PR is blank"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  )
Top 10 Primary Diagnoses in IDD Patients with No Procedure Recorded
Primary Diagnosis Code Description Frequency Percent (%)
A419 Sepsis, unspecified organism 1,856 8.3
F250 Schizoaffective disorder, bipolar type 1,017 4.5
F332 Major depressive disorder, recurrent severe without psychotic features 665 3.0
J690 Pneumonitis due to inhalation of food and vomit 611 2.7
F209 Schizophrenia, unspecified 576 2.6
G40909 Epilepsy, unspecified, not intractable, without status epilepticus 507 2.3
F319 Bipolar disorder, unspecified 452 2.0
F259 Schizoaffective disorder, unspecified 411 1.8
U071 COVID-19 391 1.7
J189 Pneumonia, unspecified organism 353 1.6
F840 Austistic Disorder 333 1.5
N390 Urinary tract infection, site not specified 325 1.5
F29 Unspecified psychosis not due to a substance or known physiological condition 292 1.3
N179 Acute kindey failure, unspecified 281 1.3
E1010 Type 1 diabetes mellitus with ketoacidosis without coma 279 1.2
Restricted to hospitalizations where I10_PR is blank

Psychiatric Comorbidity Information on IDD Population 18-64

psych_prefix <- c(
  "F630", "F631", "F632", "F633", "F6381", "F6389", "F639", "F900", "F901", "F902", "F908", "F909", "F910", "F912", "F913", "F918", "F919", # ADHD, Conduct Disorders, and Hyperkinetic Syndrome
  "F064", "F4000", "F4001", "F4002", "F4010", "F4011", "F40210", "F40218" , "F40220", "F40228", "F40230", "F40231", "F40232", "F40233", "F40240", "F40241", "F40242", "F40243", "F40248", "F40290", "F40291", "F40298", "F408", "F409", "F410", "F411", "F413", "F418", "F419", "F42", "F422", "F423", "F424", "F428", "F429", "F430", "F4310", "F4311", "F4312", "F449", "F458", "F488", "F489", "F938", "F99", "R45.2", "R455", "R456", "R457", # Anxiety Disorders
  "F3010", "F3011", "F3012", "F3013", "F302", "F303", "F304", "F308", "F309", "F310", "F3110", "F3111", "F3112", "F3113", "F312", "F3130", "F3131", "F3132", "F314","F315", "F3160", "F3161","F3162", "F3163", "F3164", "F3170", "F3171", "F3172", "F3173", "F3174", "F3175", "F3176", "F3177", "F3178", "F3181", "F319", "F338", "F3481", "F3489", "F349", "F39",  # Bipolar Disorder
  "F320", "F321", "F322", "F323", "F324", "F325", "F3289", "F329", "F32A", "F330", "F331", "F332", "F333", "F3340", "F3341", "F3342", "F338", "F339", "F341",   # Depressive Disorders
  "F21", "F340", "F341", "F600", "F601", "F602", "F603", "F604", "F605", "F606", "F607", "F6081", "F6089", "F609", "F6810", "F6811", "F6812", "F6813", "F69",    # Personality Disorders
  "F4310", "F4311", "F4312",  #PTSD 
  "F200", "F201", "F202", "F203", "F205", "F2081", "F2089", "F209", "F250", "F251", "F258", "F259",   # Schizophrenia 
  "F060", "F062", "F200", "F201", "F202", "F203", "F205", "F2081"
, "F2089", "F209", "F21", "F22", "F23", "F24", "F250", "F251", "F258", "F259", "F28", "F29", "F323", "F333", "F4489"    # Schizophrenia and Other Psychotic Disorders  
)

dx_cols <- paste0("I10_DX", 1:40)
all_dx <- unlist(nis_idd_18_64[, dx_cols])
all_dx <- all_dx[!is.na(all_dx) & all_dx != ""]
psych_dx <- all_dx[all_dx %in% psych_prefix]
psych_counts <- sort(table(psych_dx), decreasing = TRUE)
top20_psych <- head(psych_counts, 20)
top20_psych
## psych_dx
##  F419  F32A  F319  F909 F4310  F209  F411  F250  F259  F603  F429   F39  F332 
##  8093  4526  3778  3331  2806  2349  2290  1840  1641  1553  1382  1300  1102 
##  F329   F29  F639  F410 F6381  F200  F251 
##  1094   816   764   694   681   617   407
top20_df <- data.frame(
  ICD10_Code=names(top20_psych), 
  Count = as.numeric(top20_psych)
)
top20_df
##    ICD10_Code Count
## 1        F419  8093
## 2        F32A  4526
## 3        F319  3778
## 4        F909  3331
## 5       F4310  2806
## 6        F209  2349
## 7        F411  2290
## 8        F250  1840
## 9        F259  1641
## 10       F603  1553
## 11       F429  1382
## 12        F39  1300
## 13       F332  1102
## 14       F329  1094
## 15        F29   816
## 16       F639   764
## 17       F410   694
## 18      F6381   681
## 19       F200   617
## 20       F251   407
top20_df$Percent <- round(100 * top20_df$Count / nrow(nis_idd_18_64), 2)
top20_df
##    ICD10_Code Count Percent
## 1        F419  8093   17.34
## 2        F32A  4526    9.70
## 3        F319  3778    8.09
## 4        F909  3331    7.14
## 5       F4310  2806    6.01
## 6        F209  2349    5.03
## 7        F411  2290    4.91
## 8        F250  1840    3.94
## 9        F259  1641    3.52
## 10       F603  1553    3.33
## 11       F429  1382    2.96
## 12        F39  1300    2.78
## 13       F332  1102    2.36
## 14       F329  1094    2.34
## 15        F29   816    1.75
## 16       F639   764    1.64
## 17       F410   694    1.49
## 18      F6381   681    1.46
## 19       F200   617    1.32
## 20       F251   407    0.87
top20_df$Description <- case_when(
  top20_df$ICD10_Code == "F419" ~ "Anxiety disorder, unspecified",
  top20_df$ICD10_Code == "F32A" ~ "Depression, unspecified",
  top20_df$ICD10_Code == "F319" ~ "Bipolar disorder, unspecified",
  top20_df$ICD10_Code == "F909" ~ "Attention-deficit hyperactivity disorder, unspecified",
  top20_df$ICD10_Code == "F4310" ~ "Post-traumatic stress disorder, unspecified",
  top20_df$ICD10_Code == "F209" ~ "Schizophrenia, unspecified",
  top20_df$ICD10_Code == "F411" ~ "Generalized anxiety disorder",
  top20_df$ICD10_Code == "F250" ~ "Schizoaffective disorder, bipolar type",
  top20_df$ICD10_Code == "F259" ~ "Schizoaffective disorder, unspecified",
  top20_df$ICD10_Code == "F603" ~ "Borderline personality disorder",
  top20_df$ICD10_Code == "F429" ~ "Obsessive-compulsive disorder, unspecified",
  top20_df$ICD10_Code == "F39" ~ "Unspecified mood disorder",
  top20_df$ICD10_Code == "F332" ~ "Major depressive disorder, recurrent severe without psychotic features",
  top20_df$ICD10_Code == "F329" ~ "Major depressive disorder, single episode, unspecified",
  top20_df$ICD10_Code == "F29" ~ "Unspecified psychosis not due to a substance or known physiological condition",
  top20_df$ICD10_Code == "F639" ~ "Impulse disorder, unspecified",
  top20_df$ICD10_Code == "F410" ~ "Panic disorder without agoraphobia",
  top20_df$ICD10_Code == "F6381" ~ "Intermittent explosive disorder",
  top20_df$ICD10_Code == "F200" ~ "Paranoid schizophrenia",
  top20_df$ICD10_Code == "F251" ~ "Schizoaffective disorder, depressive type",
  TRUE ~ "Unknown"
)

top20_df %>%
  select(ICD10_Code, Description, Count, Percent) %>%
  rename(
    "ICD-10 Code" = ICD10_Code,
    "Frequency" = Count,
    "Percentage(%)" = Percent
  ) %>%
  gt() %>%
  tab_header(
    title = "Top 20 Psychiatric Comorbidities in IDD Inpatient Population ") %>%
  fmt_number(columns = "Frequency", decimals = 0, use_seps= TRUE)%>%
  tab_footnote(
    footnote = "Codes scanned across all 40 ICD-10 diagnosis fields")%>%
  tab_style(
    style = cell_text(weight = "bold"), 
    locations = cells_column_labels()
  )
Top 20 Psychiatric Comorbidities in IDD Inpatient Population
ICD-10 Code Description Frequency Percentage(%)
F419 Anxiety disorder, unspecified 8,093 17.34
F32A Depression, unspecified 4,526 9.70
F319 Bipolar disorder, unspecified 3,778 8.09
F909 Attention-deficit hyperactivity disorder, unspecified 3,331 7.14
F4310 Post-traumatic stress disorder, unspecified 2,806 6.01
F209 Schizophrenia, unspecified 2,349 5.03
F411 Generalized anxiety disorder 2,290 4.91
F250 Schizoaffective disorder, bipolar type 1,840 3.94
F259 Schizoaffective disorder, unspecified 1,641 3.52
F603 Borderline personality disorder 1,553 3.33
F429 Obsessive-compulsive disorder, unspecified 1,382 2.96
F39 Unspecified mood disorder 1,300 2.78
F332 Major depressive disorder, recurrent severe without psychotic features 1,102 2.36
F329 Major depressive disorder, single episode, unspecified 1,094 2.34
F29 Unspecified psychosis not due to a substance or known physiological condition 816 1.75
F639 Impulse disorder, unspecified 764 1.64
F410 Panic disorder without agoraphobia 694 1.49
F6381 Intermittent explosive disorder 681 1.46
F200 Paranoid schizophrenia 617 1.32
F251 Schizoaffective disorder, depressive type 407 0.87
Codes scanned across all 40 ICD-10 diagnosis fields

Categorized IDD Categories Table

autism_codes <- c("F840", "F843", "F845","F848", "F849")
cerebral_palsy_codes <- c("G800", "G801", "G802", "G803", "G804", "G808", "G809")
mild_id_codes <- c("F70")
moderate_id_codes <- c("F71")
severe_id_codes <- c("F72")
profound_id_codes <- c("F73")
other_id_codes <- c("F78", "F78A1", "F78A9")
unspecified_id_codes <- c("F79")
fetal_alcohol_codes <- c("P043","Q860")
congential_syndrome_codes <- c("Q871", "Q8711", "Q8719", "Q872", "Q873", "Q875",
                               "Q8781", "Q8783", "Q8784", "Q8785","Q8789", 
                               "Q897", "Q898")
down_syndrome_codes <- c("Q900", "Q901", "Q902", "Q909")
trisomy_codes <- c("Q910", "Q911", "Q912", "Q913", "Q914", "Q915", "Q916", "Q917")
other_trisomy_codes <- c("Q920", "Q921", "Q922", "Q925", "Q9261", "Q9262", 
                         "Q927", "Q928", "Q929")
monosomy_codes <- c("Q930","Q931", "Q932", "Q933", "Q934", "Q935", "Q9351", "Q9352", 
                   "Q9359","Q937", "Q9381", "Q9388", "Q9389", "Q939")
balanced_rearrangement_codes <- c("Q952", "Q953") 
fragile_x_codes <- c("Q992")
other_psych_dev_codes <- c("F88", "F89")
speech_language_codes <- c("F800", "F801", "F802", "F804", "F8081", "F8082",
                           "F8089", "F809")
scholastic_codes <- c("F810", "F812", "F8181", "F8189", "F819")
motor_dev_codes <- c("F82")
hearing_loss_codes <- c("H9325")
aphasia_codes <- c("R480")

dx_cols <- paste0("I10_DX", 1:40)

flag_dx <- function(data, code_list) {
  as.integer(apply(data[, dx_cols], 1, function(row) {
    any(row %in% code_list, na.rm = TRUE)
  }))
}

nis_idd_flagged <- nis_idd_18_64 %>%
  mutate(
    cat_autism     = flag_dx(nis_idd_18_64, autism_codes),
    cat_cerebral_palsy = flag_dx(nis_idd_18_64, cerebral_palsy_codes),
    cat_mild_id = flag_dx(nis_idd_18_64, mild_id_codes),
    cat_moderate_id = flag_dx(nis_idd_18_64, moderate_id_codes),
    cat_severe_id = flag_dx(nis_idd_18_64, severe_id_codes),
    cat_profound_id = flag_dx(nis_idd_18_64, profound_id_codes),
    cat_other_id = flag_dx(nis_idd_18_64, other_id_codes),
    cat_unspecified_id = flag_dx(nis_idd_18_64, unspecified_id_codes),
    cat_fetal_alcohol = flag_dx(nis_idd_18_64, fetal_alcohol_codes),
    cat_congenital_syndrome = flag_dx(nis_idd_18_64, congential_syndrome_codes),
    cat_down_syndrome = flag_dx(nis_idd_18_64, down_syndrome_codes),
    cat_trisomy = flag_dx(nis_idd_18_64, trisomy_codes),
    cat_other_trisomy = flag_dx(nis_idd_18_64, other_trisomy_codes),
    cat_monosomy = flag_dx(nis_idd_18_64, monosomy_codes),
    cat_balanced_rearrange = flag_dx(nis_idd_18_64, balanced_rearrangement_codes),
    cat_fragile_x = flag_dx(nis_idd_18_64, fragile_x_codes),
    cat_other_psych_dev = flag_dx(nis_idd_18_64, other_psych_dev_codes),
    cat_speech_language = flag_dx(nis_idd_18_64, speech_language_codes),
    cat_scholastic = flag_dx(nis_idd_18_64, scholastic_codes),
    cat_motor_dev = flag_dx(nis_idd_18_64, motor_dev_codes),
    cat_hearing_loss = flag_dx(nis_idd_18_64, hearing_loss_codes),
    cat_aphasia = flag_dx(nis_idd_18_64, aphasia_codes)
    )

cat_cols <- grep("^cat_", colnames(nis_idd_flagged), value = TRUE)
cat_matrix <- as.matrix(nis_idd_flagged[, cat_cols])
cooccur_matrix <-t(cat_matrix) %*% cat_matrix

nice_names <- c("Autism", "Cerebral Palsy", "Mild ID", "Moderate ID", "Severe ID",
                "Profound ID", "Other ID", "Unspecified ID", "Fetal Alcohol", 
                "Congenital Malformation Syndromes", "Down Syndrome", 
                "Trisomy 13/18", "Other Trisomy", "Monosomy", 
                "Balanced Rearrangement", "Fragile X", "Other Psych Disorders", 
                "Speech/Language Disorder", "Scholastic Disorder", "Motor Deviation", 
                "Hearing Loss", "Aphasia")

rownames(cooccur_matrix) <- nice_names
colnames(cooccur_matrix) <- nice_names

cooccur_matrix
##                                   Autism Cerebral Palsy Mild ID Moderate ID
## Autism                             12964            654     266         211
## Cerebral Palsy                       654          12709     167         141
## Mild ID                              266            167    2882           2
## Moderate ID                          211            141       2        1098
## Severe ID                            335            551       1           1
## Profound ID                          124            541       0           1
## Other ID                              21             11       1           0
## Unspecified ID                      1515           1688      26          16
## Fetal Alcohol                         79             21      30          13
## Congenital Malformation Syndromes     83             58      22          20
## Down Syndrome                        198            101      40          53
## Trisomy 13/18                          4             13       0           0
## Other Trisomy                         14             24       5           4
## Monosomy                              52             75       6           9
## Balanced Rearrangement                 0              0       0           0
## Fragile X                             42             13       8           7
## Other Psych Disorders                175            499      17          13
## Speech/Language Disorder             150             99      19          13
## Scholastic Disorder                  180            103      66          13
## Motor Deviation                       10              9       2           0
## Hearing Loss                           9              3       0           0
## Aphasia                               28              3       3           0
##                                   Severe ID Profound ID Other ID Unspecified ID
## Autism                                  335         124       21           1515
## Cerebral Palsy                          551         541       11           1688
## Mild ID                                   1           0        1             26
## Moderate ID                               1           1        0             16
## Severe ID                              1593           9        3             24
## Profound ID                               9        1033        0             10
## Other ID                                  3           0       87              3
## Unspecified ID                           24          10        3          11979
## Fetal Alcohol                             2           4        0             95
## Congenital Malformation Syndromes        23          12        4            119
## Down Syndrome                           103          65        5            447
## Trisomy 13/18                             2           1        0             10
## Other Trisomy                             4           2        1             24
## Monosomy                                 30          17        1             81
## Balanced Rearrangement                    0           0        0              0
## Fragile X                                 2           0        2             42
## Other Psych Disorders                    48          36        5            178
## Speech/Language Disorder                 18           5        2            113
## Scholastic Disorder                      17           6        1            194
## Motor Deviation                           3           2        0              7
## Hearing Loss                              0           0        0              2
## Aphasia                                   1           0        0              9
##                                   Fetal Alcohol
## Autism                                       79
## Cerebral Palsy                               21
## Mild ID                                      30
## Moderate ID                                  13
## Severe ID                                     2
## Profound ID                                   4
## Other ID                                      0
## Unspecified ID                               95
## Fetal Alcohol                               444
## Congenital Malformation Syndromes             3
## Down Syndrome                                 3
## Trisomy 13/18                                 0
## Other Trisomy                                 0
## Monosomy                                      2
## Balanced Rearrangement                        0
## Fragile X                                     0
## Other Psych Disorders                         9
## Speech/Language Disorder                      6
## Scholastic Disorder                          17
## Motor Deviation                               2
## Hearing Loss                                  0
## Aphasia                                       0
##                                   Congenital Malformation Syndromes
## Autism                                                           83
## Cerebral Palsy                                                   58
## Mild ID                                                          22
## Moderate ID                                                      20
## Severe ID                                                        23
## Profound ID                                                      12
## Other ID                                                          4
## Unspecified ID                                                  119
## Fetal Alcohol                                                     3
## Congenital Malformation Syndromes                              1954
## Down Syndrome                                                    12
## Trisomy 13/18                                                     0
## Other Trisomy                                                     1
## Monosomy                                                          5
## Balanced Rearrangement                                            0
## Fragile X                                                         1
## Other Psych Disorders                                            30
## Speech/Language Disorder                                          9
## Scholastic Disorder                                              14
## Motor Deviation                                                   5
## Hearing Loss                                                      1
## Aphasia                                                           0
##                                   Down Syndrome Trisomy 13/18 Other Trisomy
## Autism                                      198             4            14
## Cerebral Palsy                              101            13            24
## Mild ID                                      40             0             5
## Moderate ID                                  53             0             4
## Severe ID                                   103             2             4
## Profound ID                                  65             1             2
## Other ID                                      5             0             1
## Unspecified ID                              447            10            24
## Fetal Alcohol                                 3             0             0
## Congenital Malformation Syndromes            12             0             1
## Down Syndrome                              3701             6             0
## Trisomy 13/18                                 6           132             0
## Other Trisomy                                 0             0           279
## Monosomy                                      4             3             9
## Balanced Rearrangement                        0             0             0
## Fragile X                                     2             0             0
## Other Psych Disorders                        30             9            14
## Speech/Language Disorder                     19             0             1
## Scholastic Disorder                          17             2             5
## Motor Deviation                               0             0             0
## Hearing Loss                                  0             0             0
## Aphasia                                       0             0             0
##                                   Monosomy Balanced Rearrangement Fragile X
## Autism                                  52                      0        42
## Cerebral Palsy                          75                      0        13
## Mild ID                                  6                      0         8
## Moderate ID                              9                      0         7
## Severe ID                               30                      0         2
## Profound ID                             17                      0         0
## Other ID                                 1                      0         2
## Unspecified ID                          81                      0        42
## Fetal Alcohol                            2                      0         0
## Congenital Malformation Syndromes        5                      0         1
## Down Syndrome                            4                      0         2
## Trisomy 13/18                            3                      0         0
## Other Trisomy                            9                      0         0
## Monosomy                               591                      0         1
## Balanced Rearrangement                   0                      0         0
## Fragile X                                1                      0       330
## Other Psych Disorders                   29                      0         3
## Speech/Language Disorder                 8                      0         2
## Scholastic Disorder                     12                      0         2
## Motor Deviation                          3                      0         0
## Hearing Loss                             0                      0         0
## Aphasia                                  0                      0         0
##                                   Other Psych Disorders
## Autism                                              175
## Cerebral Palsy                                      499
## Mild ID                                              17
## Moderate ID                                          13
## Severe ID                                            48
## Profound ID                                          36
## Other ID                                              5
## Unspecified ID                                      178
## Fetal Alcohol                                         9
## Congenital Malformation Syndromes                    30
## Down Syndrome                                        30
## Trisomy 13/18                                         9
## Other Trisomy                                        14
## Monosomy                                             29
## Balanced Rearrangement                                0
## Fragile X                                             3
## Other Psych Disorders                               841
## Speech/Language Disorder                             30
## Scholastic Disorder                                  16
## Motor Deviation                                       4
## Hearing Loss                                          3
## Aphasia                                               0
##                                   Speech/Language Disorder Scholastic Disorder
## Autism                                                 150                 180
## Cerebral Palsy                                          99                 103
## Mild ID                                                 19                  66
## Moderate ID                                             13                  13
## Severe ID                                               18                  17
## Profound ID                                              5                   6
## Other ID                                                 2                   1
## Unspecified ID                                         113                 194
## Fetal Alcohol                                            6                  17
## Congenital Malformation Syndromes                        9                  14
## Down Syndrome                                           19                  17
## Trisomy 13/18                                            0                   2
## Other Trisomy                                            1                   5
## Monosomy                                                 8                  12
## Balanced Rearrangement                                   0                   0
## Fragile X                                                2                   2
## Other Psych Disorders                                   30                  16
## Speech/Language Disorder                              1094                  31
## Scholastic Disorder                                     31                2158
## Motor Deviation                                          7                   1
## Hearing Loss                                             0                   2
## Aphasia                                                  0                   2
##                                   Motor Deviation Hearing Loss Aphasia
## Autism                                         10            9      28
## Cerebral Palsy                                  9            3       3
## Mild ID                                         2            0       3
## Moderate ID                                     0            0       0
## Severe ID                                       3            0       1
## Profound ID                                     2            0       0
## Other ID                                        0            0       0
## Unspecified ID                                  7            2       9
## Fetal Alcohol                                   2            0       0
## Congenital Malformation Syndromes               5            1       0
## Down Syndrome                                   0            0       0
## Trisomy 13/18                                   0            0       0
## Other Trisomy                                   0            0       0
## Monosomy                                        3            0       0
## Balanced Rearrangement                          0            0       0
## Fragile X                                       0            0       0
## Other Psych Disorders                           4            3       0
## Speech/Language Disorder                        7            0       0
## Scholastic Disorder                             1            2       2
## Motor Deviation                                64            0       0
## Hearing Loss                                    0           40       1
## Aphasia                                         0            1     381
cooccur_pairs <- as.data.frame(as.table(cooccur_matrix)) %>%
  rename(Category1 = Var1, Category2 = Var2, Count = Freq) %>%
  filter(as.character(Category1) < as.character(Category2)) %>%
  arrange(desc(Count)) %>%
  mutate(
    Percentage = round(Count / nrow(nis_idd_18_64) * 100, 1)
  )

head(cooccur_pairs, 20)
##                            Category1                Category2 Count Percentage
## 1                     Cerebral Palsy           Unspecified ID  1688        3.6
## 2                             Autism           Unspecified ID  1515        3.2
## 3                             Autism           Cerebral Palsy   654        1.4
## 4                     Cerebral Palsy                Severe ID   551        1.2
## 5                     Cerebral Palsy              Profound ID   541        1.2
## 6                     Cerebral Palsy    Other Psych Disorders   499        1.1
## 7                      Down Syndrome           Unspecified ID   447        1.0
## 8                             Autism                Severe ID   335        0.7
## 9                             Autism                  Mild ID   266        0.6
## 10                            Autism              Moderate ID   211        0.5
## 11                            Autism            Down Syndrome   198        0.4
## 12               Scholastic Disorder           Unspecified ID   194        0.4
## 13                            Autism      Scholastic Disorder   180        0.4
## 14             Other Psych Disorders           Unspecified ID   178        0.4
## 15                            Autism    Other Psych Disorders   175        0.4
## 16                    Cerebral Palsy                  Mild ID   167        0.4
## 17                            Autism Speech/Language Disorder   150        0.3
## 18                    Cerebral Palsy              Moderate ID   141        0.3
## 19                            Autism              Profound ID   124        0.3
## 20 Congenital Malformation Syndromes           Unspecified ID   119        0.3
cooccur_pairs %>%
  head(20)%>%
  rename(
    "IDD Category 1" = Category1,
    "IDD Category 2" = Category2, 
    "Co-occurrence Count" = Count,
    "Percent of Population (%)" = Percentage
  ) %>%
  gt() %>%
  tab_header(
    title = "Top 20 Co-occurring IDD Diagnostic Category Pairs", 
    subtitle = "IDD Inpatient Population Ages 18-64, NIS 2022"
  ) %>%
  fmt_number(columns = "Co-occurrence Count", decimals = 0, use_seps = TRUE) %>% 
  tab_footnote(
    footnote = "Co-occurrence counts reflect hospitalizations where both
    categories appear across any of the 40 ICD-10 diagnosis fields"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  )
Top 20 Co-occurring IDD Diagnostic Category Pairs
IDD Inpatient Population Ages 18-64, NIS 2022
IDD Category 1 IDD Category 2 Co-occurrence Count Percent of Population (%)
Cerebral Palsy Unspecified ID 1,688 3.6
Autism Unspecified ID 1,515 3.2
Autism Cerebral Palsy 654 1.4
Cerebral Palsy Severe ID 551 1.2
Cerebral Palsy Profound ID 541 1.2
Cerebral Palsy Other Psych Disorders 499 1.1
Down Syndrome Unspecified ID 447 1.0
Autism Severe ID 335 0.7
Autism Mild ID 266 0.6
Autism Moderate ID 211 0.5
Autism Down Syndrome 198 0.4
Scholastic Disorder Unspecified ID 194 0.4
Autism Scholastic Disorder 180 0.4
Other Psych Disorders Unspecified ID 178 0.4
Autism Other Psych Disorders 175 0.4
Cerebral Palsy Mild ID 167 0.4
Autism Speech/Language Disorder 150 0.3
Cerebral Palsy Moderate ID 141 0.3
Autism Profound ID 124 0.3
Congenital Malformation Syndromes Unspecified ID 119 0.3
Co-occurrence counts reflect hospitalizations where both categories appear across any of the 40 ICD-10 diagnosis fields
# Frequency table of IDD diagnostic categories

idd_freq_table <- data.frame(
  Category = nice_names,
  Count = colSums(cat_matrix), 
  Percentage = round(colSums(cat_matrix) / nrow(nis_idd_18_64) * 100, 1)
) %>% 
  arrange(desc(Count))

idd_freq_table %>%
  rename(
    "IDD Diagnostic Category" = Category, 
    "Frequency" = Count,
    "Percent (%)" = Percentage
  ) %>%
  gt() %>%
  tab_header(
    title = "Frequency of IDD Diagnostic Categories in Inpatient Population",
    subtitle = "NIS 2022, Ages 18-64"
  ) %>%
  fmt_number(columns = "Frequency", decimals = 0, use_seps= TRUE) %>%
  tab_footnote(
    footnote = "Categories scanned across all 40 ICD-10 diagnosis fields. One hospitalization may appear in multiple categories."
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  )
Frequency of IDD Diagnostic Categories in Inpatient Population
NIS 2022, Ages 18-64
IDD Diagnostic Category Frequency Percent (%)
Autism 12,964 27.8
Cerebral Palsy 12,709 27.2
Unspecified ID 11,979 25.7
Down Syndrome 3,701 7.9
Mild ID 2,882 6.2
Scholastic Disorder 2,158 4.6
Congenital Malformation Syndromes 1,954 4.2
Severe ID 1,593 3.4
Moderate ID 1,098 2.4
Speech/Language Disorder 1,094 2.3
Profound ID 1,033 2.2
Other Psych Disorders 841 1.8
Monosomy 591 1.3
Fetal Alcohol 444 1.0
Aphasia 381 0.8
Fragile X 330 0.7
Other Trisomy 279 0.6
Trisomy 13/18 132 0.3
Other ID 87 0.2
Motor Deviation 64 0.1
Hearing Loss 40 0.1
Balanced Rearrangement 0 0.0
Categories scanned across all 40 ICD-10 diagnosis fields. One hospitalization may appear in multiple categories.

Heatmap

cooccur_melt <- melt(cooccur_matrix)
colnames(cooccur_melt) <- c("Category1", "Category2","Count")

cooccur_melt <- cooccur_melt %>%
  filter(as.character(Category1) != as.character(Category2))

ggplot(cooccur_melt, aes(x = Category1, y = Category2, fill = Count)) + 
  geom_tile(color = "white") + 
  scale_fill_gradient(
    low = "white",
    high = "#08519C",
    name = "Co-occurrence\nCount",
    labels = scales::comma
  ) + 
  geom_text(
    aes(label = ifelse(Count > 0, scales::comma(Count), "")),
    size = 2.5,
    color = "black"
  ) + 
  labs(
    title = "Co-occurrence of IDD Diagnostic Categories",
    subtitle = "IDD Inpatient Population Ages 18-64, NIS 2022",
    x = NULL,
    y = NULL,
    caption = "Values represent number of hospitalizations where both 
    categories appear across the 40 ICD-10 diagnosis fields") +
  theme_minimal(base_size = 11) + 
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.text.y = element_text(size = 8),
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, color = "gray40"),
    plot.caption = element_text(size = 7, color = "gray50"),
    legend.position = "right", 
    panel.grid = element_blank())

Categorized Psych Comorbidity Categories Table

dx_cols <- paste0("I10_DX", 1:40)

#defining all psychiatric categories

impulse_codes <- c("F630", "F631", "F632", "F633", "F6381", "F6389", "F639")
adhd_codes <- c("F900", "F901", "F902", "F908", "F909")
conduct_codes <- c("F910", "F911", "F912", "F913", "F918", "F919")
anxiety_phys_codes <- c("F064")
phobic_anxiety_codes <- c("F4000", "F4001", "F4002", "F4010", "F4011", "F40210", "F40218", 
                          "F40220", "F40228", "F40230", "F40231", "F40232", "F40233", 
                          "F40240", "F40241", "F40242", "F40243", "F40248", "F40290",
                          "F40291", "F40298", "F408", "F409")
other_anxiety_codes <- c("F410", "F411", "F413", "F418", "F419")
ocd_codes <- c("F42", "F422", "F423", "F424", "F428", "F429")
acute_stress_codes <- c("F430")
ptsd_codes <- c("F4310", "F4311", "F4312")
dissociative_codes <- c("F449", "F4489")
somatoform_codes <-c("F458")
nonpsychotic_codes <- c("F488", "F489")
childhood_emotional_codes <- c("F938")
mental_disorder_codes <- c("F99")
emotional_state_codes <- c("R452", "R455", "R456", "R457")
manic_episode_codes <- c("F3010", "F3011", "F3012", "F3013", "F302", "F303",
                         "F304", "F308", "F309")
bipolar_codes <- c("F310", "F3110", "F3111", "F3112", "F3113", "F312", "F3130",
                   "F3131", "F3132", "F314", "F315", "F3160", "F3161", "F3162", 
                   "F3163", "F3164", "F3170", "F3171", "F3172", "F3173", "F3174",
                   "F3175", "F3176", "F3177", "F3178", "F3181", "F3189", "F319")
other_recurrent_dep_codes <- c("F338")
mood_disorder_codes <- c("F3481", "F3489", "F349", "F39")
depressive_episode_codes <- c("F320", "F321", "F322", "F323", "F324", "F325",
                              "F3289", "F329", "F32A")
major_dep_recurrent_codes <- c("F330", "F331", "F332", "F333", "F3340", "F3341",
                               "F3342","F338", "F339")
dysthymic_codes <- c("F341")
schizotypal_codes <- c("F21")
cyclothymic_codes <- c("F340")
personality_codes <- c("F600", "F601", "F602", "F603", "F604", "F605", "F606",
                       "F607", "F6081", "F6089", "F609")
factitious_codes <- c("F6810", "F6811", "F6812", "F6813")
unspec_personality_codes <- c("F69")
schizophrenia_codes <- c("F200", "F201", "F202", "F203", "F205", "F2081",
                         "F2089", "F209")
schizoaffective_codes <- c("F250", "F251", "F258", "F259")
psychotic_codes <- c("F060", "F062")
delusional_codes <- c("F22")
brief_psychotic_codes <- c("F23")
shared_psychotic_codes <- c("F24")
other_psychotic_codes <- c("F28")
unspec_psychosis_codes <- c("F29")

flag_dx <- function(data, code_list) {
  as.integer(apply(data[, dx_cols], 1, function(row) {
    any(row %in% code_list, na.rm = TRUE)
  }))
}

nis_psych_flagged <- nis_idd_flagged %>%
  mutate(
    psych_impulse = flag_dx(nis_idd_flagged, impulse_codes),
    psych_adhd = flag_dx(nis_idd_flagged, adhd_codes),
    psych_conduct = flag_dx(nis_idd_flagged, conduct_codes),
    psych_anxiety_phys = flag_dx(nis_idd_flagged, anxiety_phys_codes),
    psych_phobic_anxiety = flag_dx(nis_idd_flagged, phobic_anxiety_codes),
    psych_other_anxiety = flag_dx(nis_idd_flagged, other_anxiety_codes),
    psych_ocd = flag_dx(nis_idd_flagged, ocd_codes),
    psych_acute_stress = flag_dx(nis_idd_flagged, acute_stress_codes),
    psych_ptsd = flag_dx(nis_idd_flagged, ptsd_codes),
    psych_dissociative = flag_dx(nis_idd_flagged, dissociative_codes),
    psych_somatoform = flag_dx(nis_idd_flagged, somatoform_codes),
    psych_nonpsychotic = flag_dx(nis_idd_flagged, nonpsychotic_codes),
    psych_childhood_emo = flag_dx(nis_idd_flagged, childhood_emotional_codes),
    psych_mental_disorder = flag_dx(nis_idd_flagged, mental_disorder_codes),
    psych_emotional_state = flag_dx(nis_idd_flagged, emotional_state_codes),
    psych_manic = flag_dx(nis_idd_flagged, manic_episode_codes),
    psych_bipolar =flag_dx(nis_idd_flagged, bipolar_codes),
    psych_other_recur_dep = flag_dx(nis_idd_flagged, other_recurrent_dep_codes),
    psych_mood_disorder = flag_dx(nis_idd_flagged, mood_disorder_codes),
    psych_dep_episode = flag_dx(nis_idd_flagged, depressive_episode_codes),
    psych_major_dep_recurr = flag_dx(nis_idd_flagged, major_dep_recurrent_codes),
    psych_dysthymic = flag_dx(nis_idd_flagged, dysthymic_codes),
    psych_schizotypical = flag_dx(nis_idd_flagged, schizotypal_codes),
    psych_cyclothymic = flag_dx(nis_idd_flagged, cyclothymic_codes),
    psych_personality = flag_dx(nis_idd_flagged, personality_codes),
    psych_factitious = flag_dx(nis_idd_flagged, factitious_codes),
    psych_unspec_personality = flag_dx(nis_idd_flagged, unspec_personality_codes),
    psych_schizophrenia = flag_dx(nis_idd_flagged, schizophrenia_codes),
    psych_schizoaffective = flag_dx(nis_idd_flagged, schizoaffective_codes),
    psych_psychotic = flag_dx(nis_idd_flagged, psychotic_codes),
    psych_delusional = flag_dx(nis_idd_flagged, delusional_codes),
    psych_brief_psychotic = flag_dx(nis_idd_flagged, brief_psychotic_codes),
    psych_shared_psychotic = flag_dx(nis_idd_flagged, shared_psychotic_codes),
    psych_other_psychotic = flag_dx(nis_idd_flagged, other_psychotic_codes),
    psych_unspec_psychosis = flag_dx(nis_idd_flagged, unspec_psychosis_codes)
  )
  
  psych_cat_cols <- grep("^psych_", colnames(nis_psych_flagged), value = TRUE)
  
  nice_psych_names <- c(
    "Impulse Disorders", "Attention-deficity hyperactivity disorder", 
    "Conduct Disorders",
    "Anxiety Due to Physiological Condition", "Phobic Anxiety Disorders",
    "Other Anxiety Disorders", "Obsessive-Compulsive Disorder",
    "Acute Stress Reaction", "Post-Traumatic Stress Disorder", 
    "Dissociative/Conversion Disorder","Other Somatoform Disorders",
    "Nonpsychotic Mental Disorders", "Childhood Emotional Disorders", 
    "Mental Disorder, Unspecified", "Symmptoms Involving Emotional State",
    "Manic Episode", "Bipolar Disorder", "Other Recurrent Depressive Disorders",
    "Mood Disorders", "Depressive Episode", "Major Depressive Disorder, Recurrent",
    "Dysthymic Disorder", "Schizotypal Disorder", "Cyclothymic Disorder",
    "Specific Personality Disorders", "Factitious Disorder", "Unspecified Personality Disorder",
    "Schizophrenia", "Schizoaffective Disorder", "Psychotic Disorder", 
    "Delusional Disorder", "Brief Psychotic Disorder",
    "Shared Psychotic Disorder", "Other Psychotic Disorder",
    "Unspecified Psychosis"
  )
  
  psych_freq_table <- data.frame(
    Category = nice_psych_names,
    Count = colSums(nis_psych_flagged[, psych_cat_cols]),
    Percentage = round(colSums(nis_psych_flagged[, psych_cat_cols]) /
                         nrow(nis_psych_flagged) * 100, 1)
  ) %>%
    arrange(desc(Count))
  
psych_freq_table %>%
  rename(
    "Psychiatric Category" = Category,
    "Frequency" = Count,
    "Percent (%)" = Percentage
  ) %>%
  gt() %>%
  tab_header (
    title = "Psychiatric Comorbidity Categories in IDD Inpatient Population",
    subtitle = "NIS 2022, Ages 18-64"
  ) %>%
  fmt_number(columns = "Frequency", decimals = 0, use_seps = TRUE) %>%
  tab_footnote(
    footnote = "Categories scanned across all 40 ICD-10 diagnosis fields"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  )
Psychiatric Comorbidity Categories in IDD Inpatient Population
NIS 2022, Ages 18-64
Psychiatric Category Frequency Percent (%)
Other Anxiety Disorders 11,076 23.7
Depressive Episode 5,962 12.8
Bipolar Disorder 5,523 11.8
Schizoaffective Disorder 3,872 8.3
Attention-deficity hyperactivity disorder 3,833 8.2
Schizophrenia 3,305 7.1
Post-Traumatic Stress Disorder 3,120 6.7
Major Depressive Disorder, Recurrent 2,378 5.1
Specific Personality Disorders 2,295 4.9
Mood Disorders 1,468 3.1
Obsessive-Compulsive Disorder 1,459 3.1
Impulse Disorders 1,452 3.1
Unspecified Psychosis 816 1.7
Conduct Disorders 699 1.5
Delusional Disorder 313 0.7
Phobic Anxiety Disorders 301 0.6
Brief Psychotic Disorder 268 0.6
Symmptoms Involving Emotional State 241 0.5
Dysthymic Disorder 94 0.2
Dissociative/Conversion Disorder 70 0.1
Anxiety Due to Physiological Condition 54 0.1
Acute Stress Reaction 52 0.1
Mental Disorder, Unspecified 43 0.1
Manic Episode 43 0.1
Other Somatoform Disorders 40 0.1
Other Psychotic Disorder 33 0.1
Schizotypal Disorder 27 0.1
Factitious Disorder 14 0.0
Cyclothymic Disorder 12 0.0
Other Recurrent Depressive Disorders 9 0.0
Psychotic Disorder 8 0.0
Unspecified Personality Disorder 7 0.0
Nonpsychotic Mental Disorders 4 0.0
Shared Psychotic Disorder 4 0.0
Childhood Emotional Disorders 0 0.0
Categories scanned across all 40 ICD-10 diagnosis fields
psych_matrix <- as.matrix(nis_psych_flagged[, psych_cat_cols])
colnames(psych_matrix) <- nice_psych_names
psych_cooccur <- t(psych_matrix) %*% psych_matrix

psych_cooccur_pairs <- as.data.frame(as.table(psych_cooccur)) %>%
  rename(Category1 = Var1, Category2 = Var2, Count = Freq) %>%
  filter(as.character(Category1) < as.character(Category2)) %>% 
  arrange(desc(Count)) %>%
  mutate(Percentage = round(Count / nrow(nis_psych_flagged) * 100, 1))

psych_cooccur_pairs %>%
  head(20) %>%
  rename(
    "Psychiatric Category 1" = Category1,
    "Psychiatric Category 2" = Category2,
    "Co-occurrence Count" = Count,
    "Percent of Population (%)" = Percentage
  ) %>%
  gt() %>%
  tab_header(
    title = "Top 20 Co-occurring Psychiatric Comorbidity Pairs in IDD Population",
    subtitle = "NIS 2022, Ages 18-64"
  ) %>% 
  fmt_number(columns = "Co-occurrence Count", decimals = 0, use_seps = TRUE) %>%
  tab_footnote(
    footnote = "Co-occurrence counts reflect hospitalizations where both psychiatric
    categories appear across any of 40 ICD-10 diagnosis fields") %>%
  tab_style(
    style = cell_text(weight = "bold"), 
    locations = cells_column_labels()
  )
Top 20 Co-occurring Psychiatric Comorbidity Pairs in IDD Population
NIS 2022, Ages 18-64
Psychiatric Category 1 Psychiatric Category 2 Co-occurrence Count Percent of Population (%)
Depressive Episode Other Anxiety Disorders 3,060 6.6
Bipolar Disorder Other Anxiety Disorders 2,082 4.5
Attention-deficity hyperactivity disorder Other Anxiety Disorders 1,724 3.7
Other Anxiety Disorders Schizoaffective Disorder 1,371 2.9
Major Depressive Disorder, Recurrent Other Anxiety Disorders 1,350 2.9
Other Anxiety Disorders Post-Traumatic Stress Disorder 1,327 2.8
Other Anxiety Disorders Specific Personality Disorders 1,050 2.2
Attention-deficity hyperactivity disorder Bipolar Disorder 1,048 2.2
Bipolar Disorder Post-Traumatic Stress Disorder 971 2.1
Post-Traumatic Stress Disorder Specific Personality Disorders 868 1.9
Other Anxiety Disorders Schizophrenia 829 1.8
Attention-deficity hyperactivity disorder Post-Traumatic Stress Disorder 742 1.6
Bipolar Disorder Specific Personality Disorders 732 1.6
Attention-deficity hyperactivity disorder Depressive Episode 707 1.5
Obsessive-Compulsive Disorder Other Anxiety Disorders 702 1.5
Bipolar Disorder Schizophrenia 680 1.5
Depressive Episode Post-Traumatic Stress Disorder 630 1.3
Post-Traumatic Stress Disorder Schizoaffective Disorder 616 1.3
Major Depressive Disorder, Recurrent Post-Traumatic Stress Disorder 566 1.2
Bipolar Disorder Schizoaffective Disorder 556 1.2
Co-occurrence counts reflect hospitalizations where both psychiatric categories appear across any of 40 ICD-10 diagnosis fields

CART Creation

los_cutoff <- quantile(nis_idd_18_64$LOS, 0.90, na.rm = TRUE)

nis_cart <- nis_psych_flagged %>%
  mutate(
    long_los = factor(ifelse(LOS > 9, "Stay > 9 Days", "Stay < 9 Days"))
  )

cart_model <- rpart(
   long_los ~ AGE + FEMALE + RACE + PAY1 + ZIPINC_QRTL + PL_NCHS + DIED +
    ELECTIVE + DISPUNIFORM + TRAN_IN + TRAN_OUT +
    cat_autism + cat_cerebral_palsy + cat_mild_id + cat_moderate_id + 
    cat_severe_id + cat_profound_id + cat_other_id + cat_unspecified_id +
    cat_fetal_alcohol + cat_congenital_syndrome + cat_down_syndrome +
    cat_trisomy + cat_other_trisomy + cat_monosomy + cat_balanced_rearrange +
    cat_fragile_x + cat_other_psych_dev + cat_speech_language + cat_scholastic + 
    cat_motor_dev + cat_hearing_loss + cat_aphasia + psych_impulse + psych_adhd + 
    psych_conduct + psych_anxiety_phys + psych_phobic_anxiety + psych_other_anxiety + 
    psych_ocd + psych_acute_stress + psych_ptsd + psych_dissociative + 
    psych_somatoform + psych_nonpsychotic + psych_childhood_emo + 
    psych_mental_disorder + psych_emotional_state + psych_manic + 
    psych_bipolar + psych_other_recur_dep + psych_mood_disorder + 
    psych_dep_episode + psych_major_dep_recurr + psych_dysthymic + 
    psych_schizotypical + psych_cyclothymic + psych_personality + 
    psych_factitious + psych_unspec_personality + psych_schizophrenia + 
    psych_schizoaffective + psych_psychotic + psych_delusional + psych_brief_psychotic
    + psych_shared_psychotic + psych_other_psychotic + psych_unspec_psychosis,
  data = nis_cart,
  method = "class",
  control = rpart.control(cp = 0.0005, minsplit = 30, minbucket = 10, maxdepth = 6)
)

rpart.plot(
  cart_model, 
  type = 0,
  extra = 106,
  box.palette = "Blues",
  branch = 0.5,
  shadow.col = "gray",
  nn = TRUE, 
  fallen.leaves = TRUE,
  leaf.round = 0,
  main = "CART Model: Predictors of Top 10% Length of Stay\nIDD Inpatient Population
  Ages 18-64",
  cex = 0.8)
## Warning in polygon(x[, i], y[, i], col = col[i], border = border.col[i], :
## semi-transparency is not supported on this device: reported only once per page

cart_legend <- data.frame(
  Variable = c(
    rep("DISPUNIFORM", 9),
    rep("TRAN_IN", 3),
    rep("TRAN_OUT", 3),
    rep("ELECTIVE", 2),
    rep("PAY1", 6)
  ), 
  Value = c(
    "1", "2", "5", "6", "7", "20", "21", "99", ".", 
    "0", "1", "2",
    "0", "1", "2",
    "0", "1",
    "1", "2", "3", "4", "5", "6"
  ), 
  Description = c(
    "Routine discharge",
    "Transfer to short-term hospital",
    "Transfer to SNF/ICF/other facility",
    "Home Health Care (HHC)",
    "Against medical advice (AMA)",
    "Died in hospital",
    "Discharged to court/law enforcement",
    "Discharged alive, destination unknown",
    "Missing",
    "Not transferred in / newborn admission", 
    "Transferred in from acute care hospital",
    "Transferred in from another type of facility",
    "Not a transfer out",
    "Transferred out to acute care hospital",
    "Transferred out to another type of facility", 
    "Non-elective admission",
    "Elective admission",
    "Medicare",
    "Medicaid",
    "Private insurance",
    "Self-pay",
    "No charge",
    "Other"
  )
  )

cart_legend %>% 
  gt() %>% 
  tab_header(
    title = "CART Model Variable Reference Guide",
    subtitle = "Coding definitions for multi-categorical variables") %>%
  tab_row_group(
    label = "Primary Payer (PAY1)",
    rows = Variable == "PAY1"
  ) %>%
  tab_row_group(
    label = "Elective Admission (ELECTIVE)",
    rows = Variable == "ELECTIVE"
  ) %>%
  tab_row_group(
    label = "Transfer Out Status (TRAN_OUT)",
    rows = Variable == "TRAN_OUT"
  ) %>%
  tab_row_group(
    label = "Transfer In Status (TRAN_IN)",
    rows = Variable == "TRAN_IN"
  ) %>%
  tab_row_group(
    label = "Discharge Disposition (DISPUNIFORM)",
    rows = Variable == "DISPUNIFORM"
  ) %>%
  cols_hide(columns = "Variable") %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_row_groups()
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_footnote(
    footnote = "Reference: HCUP NIS 2022 Data Elements Documentation"
  )
CART Model Variable Reference Guide
Coding definitions for multi-categorical variables
Value Description
Discharge Disposition (DISPUNIFORM)
1 Routine discharge
2 Transfer to short-term hospital
5 Transfer to SNF/ICF/other facility
6 Home Health Care (HHC)
7 Against medical advice (AMA)
20 Died in hospital
21 Discharged to court/law enforcement
99 Discharged alive, destination unknown
. Missing
Transfer In Status (TRAN_IN)
0 Not transferred in / newborn admission
1 Transferred in from acute care hospital
2 Transferred in from another type of facility
Transfer Out Status (TRAN_OUT)
0 Not a transfer out
1 Transferred out to acute care hospital
2 Transferred out to another type of facility
Elective Admission (ELECTIVE)
0 Non-elective admission
1 Elective admission
Primary Payer (PAY1)
1 Medicare
2 Medicaid
3 Private insurance
4 Self-pay
5 No charge
6 Other
Reference: HCUP NIS 2022 Data Elements Documentation
printcp(cart_model)
## 
## Classification tree:
## rpart(formula = long_los ~ AGE + FEMALE + RACE + PAY1 + ZIPINC_QRTL + 
##     PL_NCHS + DIED + ELECTIVE + DISPUNIFORM + TRAN_IN + TRAN_OUT + 
##     cat_autism + cat_cerebral_palsy + cat_mild_id + cat_moderate_id + 
##     cat_severe_id + cat_profound_id + cat_other_id + cat_unspecified_id + 
##     cat_fetal_alcohol + cat_congenital_syndrome + cat_down_syndrome + 
##     cat_trisomy + cat_other_trisomy + cat_monosomy + cat_balanced_rearrange + 
##     cat_fragile_x + cat_other_psych_dev + cat_speech_language + 
##     cat_scholastic + cat_motor_dev + cat_hearing_loss + cat_aphasia + 
##     psych_impulse + psych_adhd + psych_conduct + psych_anxiety_phys + 
##     psych_phobic_anxiety + psych_other_anxiety + psych_ocd + 
##     psych_acute_stress + psych_ptsd + psych_dissociative + psych_somatoform + 
##     psych_nonpsychotic + psych_childhood_emo + psych_mental_disorder + 
##     psych_emotional_state + psych_manic + psych_bipolar + psych_other_recur_dep + 
##     psych_mood_disorder + psych_dep_episode + psych_major_dep_recurr + 
##     psych_dysthymic + psych_schizotypical + psych_cyclothymic + 
##     psych_personality + psych_factitious + psych_unspec_personality + 
##     psych_schizophrenia + psych_schizoaffective + psych_psychotic + 
##     psych_delusional + psych_brief_psychotic + psych_shared_psychotic + 
##     psych_other_psychotic + psych_unspec_psychosis, data = nis_cart, 
##     method = "class", control = rpart.control(cp = 5e-04, minsplit = 30, 
##         minbucket = 10, maxdepth = 6))
## 
## Variables actually used in tree construction:
## [1] AGE                 cat_cerebral_palsy  cat_speech_language
## [4] DISPUNIFORM         ELECTIVE            PAY1               
## [7] psych_mood_disorder TRAN_IN             TRAN_OUT           
## 
## Root node error: 10106/46666 = 0.21656
## 
## n=46666 (17 observations deleted due to missingness)
## 
##           CP nsplit rel error  xerror      xstd
## 1 0.00084108      0   1.00000 1.00000 0.0088047
## 2 0.00079161      5   0.99525 1.00030 0.0088056
## 3 0.00074213      7   0.99367 1.00030 0.0088056
## 4 0.00069266     11   0.99070 1.00030 0.0088056
## 5 0.00050000     12   0.99001 0.99624 0.0087927
# visTree(cart_model,
       #  main = "CART Model",
       #   width = "100%", 
       #  height = "800px",
       #  nodesPopSize = TRUE,
       # colorVar = "Blues",
       #  colorY = c("lightblue", "steelblue"),
       # legend = TRUE,
       #  legendNodesSize = 30,
       #   digits = 2) 


visTree(cart_model,
        width = "100%",
        height = "900px",
        legend = FALSE,
        digits = 2
      ) %>%
  visHierarchicalLayout(
    direction = "UD",
    levelSeparation = 150,
    nodeSpacing = 200,
    treeSpacing = 300,
  ) %>%
  visNodes(
    size = 30,
    font = list(size = 16)
  ) %>%
  visEdges(
    font = list(size = 14, align = "middle")
  )
cart_party <- as.party(cart_model)
ggparty(cart_party) + 
  geom_edge() + 
  geom_edge_label() + 
  geom_node_label(
    aes(label = splitvar),
    ids = "inner"
  ) + 
  geom_node_plot(
    gglist = list(
      geom_bar(aes(x = "", fill = long_los), position = "fill"),
      scale_fill_manual(values = c("lightblue", "steelblue")),
      theme_minimal(),
      labs(x = "", y = "Proportion")
    ),
    shared_axis_labels = TRUE
  ) + 
  theme_minimal() + 
  labs(title = "CART Model",
       subtitle = "IDD Inpatient Pop")
## Warning in geom_edge_label(): Ignoring unknown parameters: `label.size`

importance_df <- as.data.frame(cart_model$variable.importance) %>%
  tibble::rownames_to_column(var = "Variable") %>% 
  rename("Importance Score" = "cart_model$variable.importance") %>% 
  arrange(desc("Importance Score"))

importance_df %>%
  gt() %>%
  tab_header(
    title = "Variable Importance from CART Model",
    subtitle = "Outcome: Top 25% Length of Stay in IDD Population"
  ) %>%
  fmt_number(columns ="Importance Score", decimals =1) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  )
Variable Importance from CART Model
Outcome: Top 25% Length of Stay in IDD Population
Variable Importance Score
DISPUNIFORM 504.7
TRAN_OUT 308.0
TRAN_IN 138.7
AGE 51.0
DIED 37.3
ELECTIVE 9.4
PAY1 7.0
psych_mood_disorder 5.9
cat_cerebral_palsy 5.8
cat_speech_language 5.3
cat_profound_id 2.3
psych_dysthymic 0.1
psych_acute_stress 0.1
cat_down_syndrome 0.0
cat_fragile_x 0.0
psych_schizophrenia 0.0

CART Creation based on IDD Categories

los_cutoff <- quantile(nis_idd_18_64$LOS, 0.90, na.rm = TRUE)

nis_cart <- nis_psych_flagged %>%
  mutate(
    long_los = factor(ifelse(LOS > 9, "Stay > 9 Days", "Stay < 9 Days"))
  )

cart_model2 <- rpart(
   long_los ~
    cat_autism + cat_cerebral_palsy + cat_mild_id + cat_moderate_id + 
    cat_severe_id + cat_profound_id + cat_other_id + cat_unspecified_id +
    cat_fetal_alcohol + cat_congenital_syndrome + cat_down_syndrome +
    cat_trisomy + cat_other_trisomy + cat_monosomy + cat_balanced_rearrange +
    cat_fragile_x + cat_other_psych_dev + cat_speech_language + cat_scholastic + 
    cat_motor_dev + cat_hearing_loss + cat_aphasia,
  data = nis_cart,
  method = "class",
  control = rpart.control(cp = 0.0001, minsplit = 30, minbucket = 10, maxdepth = 6)
)

rpart.plot(
  cart_model2, 
  type = 0,
  extra = 106,
  box.palette = "Blues",
  branch = 0.5,
  shadow.col = "gray",
  nn = TRUE, 
  fallen.leaves = TRUE,
  leaf.round = 0,
  main = "CART Model: Predictors of Top 10% Length of Stay\nIDD Inpatient Population
  Ages 18-64",
  cex = 0.8
  )
## Warning in polygon(x[, i], y[, i], col = col[i], border = border.col[i], :
## semi-transparency is not supported on this device: reported only once per page

printcp(cart_model2)
## 
## Classification tree:
## rpart(formula = long_los ~ cat_autism + cat_cerebral_palsy + 
##     cat_mild_id + cat_moderate_id + cat_severe_id + cat_profound_id + 
##     cat_other_id + cat_unspecified_id + cat_fetal_alcohol + cat_congenital_syndrome + 
##     cat_down_syndrome + cat_trisomy + cat_other_trisomy + cat_monosomy + 
##     cat_balanced_rearrange + cat_fragile_x + cat_other_psych_dev + 
##     cat_speech_language + cat_scholastic + cat_motor_dev + cat_hearing_loss + 
##     cat_aphasia, data = nis_cart, method = "class", control = rpart.control(cp = 1e-04, 
##     minsplit = 30, minbucket = 10, maxdepth = 6))
## 
## Variables actually used in tree construction:
## [1] cat_fetal_alcohol  cat_moderate_id    cat_severe_id      cat_unspecified_id
## 
## Root node error: 10106/46666 = 0.21656
## 
## n=46666 (17 observations deleted due to missingness)
## 
##           CP nsplit rel error xerror      xstd
## 1 0.00012369      0   1.00000 1.0000 0.0088047
## 2 0.00010000      4   0.99951 0.9999 0.0088044

CART Creation based on Psych Categories

los_cutoff <- quantile(nis_idd_18_64$LOS, 0.90, na.rm = TRUE)

nis_cart <- nis_psych_flagged %>%
  mutate(
    long_los = factor(ifelse(LOS > 9, "Stay > 9 Days", "Stay < 9 Days"))
  )

cart_model3 <- rpart(
   long_los ~ 
    psych_impulse + psych_adhd + 
    psych_conduct + psych_anxiety_phys + psych_phobic_anxiety + psych_other_anxiety + 
    psych_ocd + psych_acute_stress + psych_ptsd + psych_dissociative + 
    psych_somatoform + psych_nonpsychotic + psych_childhood_emo + 
    psych_mental_disorder + psych_emotional_state + psych_manic + 
    psych_bipolar + psych_other_recur_dep + psych_mood_disorder + 
    psych_dep_episode + psych_major_dep_recurr + psych_dysthymic + 
    psych_schizotypical + psych_cyclothymic + psych_personality + 
    psych_factitious + psych_unspec_personality + psych_schizophrenia + 
    psych_schizoaffective + psych_psychotic + psych_delusional + psych_brief_psychotic
    + psych_shared_psychotic + psych_other_psychotic + psych_unspec_psychosis,
  data = nis_cart,
  method = "class",
  control = rpart.control(cp = 0.00005, minsplit = 30, minbucket = 10, maxdepth = 5)
)

rpart.plot(
  cart_model3, 
  type = 0,
  extra = 106,
  box.palette = "Blues",
  branch = 0.5,
  shadow.col = "gray",
  nn = TRUE, 
  fallen.leaves = TRUE,
  leaf.round = 0,
  main = "CART Model: Psychiatric Comorbidity Predictors of Top 10% Length of Stay\nIDD Inpatient Population
  Ages 18-64",
  cex = 0.8
  )
## Warning in polygon(x[, i], y[, i], col = col[i], border = border.col[i], :
## semi-transparency is not supported on this device: reported only once per page

printcp(cart_model3)
## 
## Classification tree:
## rpart(formula = long_los ~ psych_impulse + psych_adhd + psych_conduct + 
##     psych_anxiety_phys + psych_phobic_anxiety + psych_other_anxiety + 
##     psych_ocd + psych_acute_stress + psych_ptsd + psych_dissociative + 
##     psych_somatoform + psych_nonpsychotic + psych_childhood_emo + 
##     psych_mental_disorder + psych_emotional_state + psych_manic + 
##     psych_bipolar + psych_other_recur_dep + psych_mood_disorder + 
##     psych_dep_episode + psych_major_dep_recurr + psych_dysthymic + 
##     psych_schizotypical + psych_cyclothymic + psych_personality + 
##     psych_factitious + psych_unspec_personality + psych_schizophrenia + 
##     psych_schizoaffective + psych_psychotic + psych_delusional + 
##     psych_brief_psychotic + psych_shared_psychotic + psych_other_psychotic + 
##     psych_unspec_psychosis, data = nis_cart, method = "class", 
##     control = rpart.control(cp = 5e-05, minsplit = 30, minbucket = 10, 
##         maxdepth = 5))
## 
## Variables actually used in tree construction:
## [1] psych_emotional_state  psych_other_anxiety    psych_ptsd            
## [4] psych_schizoaffective  psych_schizophrenia    psych_unspec_psychosis
## 
## Root node error: 10106/46666 = 0.21656
## 
## n=46666 (17 observations deleted due to missingness)
## 
##           CP nsplit rel error xerror      xstd
## 1 0.00019790      0   1.00000 1.0000 0.0088047
## 2 0.00013193      3   0.99941 1.0023 0.0088119
## 3 0.00005000      6   0.99901 1.0033 0.0088151