EPI553_FinalProject_CheckIn1

#Call libraries

#Read in NSCH 2022 data

NSCH_2022 <- read.csv("data/NSCH_2022e_Topical_SPSS_CAHMI_DRCv3.csv")

#Read in NSCH 2022 special geographies data

geo <- read.csv("data/NSCH_2022_Special_Geographies.csv")

#Left join NSCH data with geographies

NSCH_2022_geo <- left_join(NSCH_2022, geo, by ="HHID")

#Limit variables

NSCH_2022_subset <- NSCH_2022_geo %>% select(HHID, FIPSST, NY_SGREGION, STRATUM, FWC, CSHCN_22, 
                                             npm11MHnonCSHCN_22, npm11MHCSHCN_22, MedHome_22,
                                             PerDrNs_22, UsualSck_22, FamCent_22, NoRefPrb_22,CareCoor_22,
                                             instype_22, PrntNativity_22, HHLanguage_22, povlev4_22, AdultEduc_22, raceASIA_22)


#Limit dataset to only NYS (FIPSST = 36)
NSCH_2022_subset_NY <- subset(NSCH_2022_subset, FIPSST == 36)

#Data preparation - cleaning NA and recoding variables
NSCH_2022_subset_NY_clean <- NSCH_2022_subset_NY %>%
  mutate(CSHCN_22 = ifelse(CSHCN_22 == 1, 1, 0)) %>%
  mutate(MedHome_22 = na_if(MedHome_22,99)) %>%
  mutate(MedHome_22 = ifelse(MedHome_22 == 1, 1, 0)) %>%
  mutate(NYC_Metro = ifelse(NY_SGREGION == 8, 0, 1))

table(NSCH_2022_subset_NY_clean$MedHome_22, useNA = "ifany")

## 
##    0    1 <NA> 
## 2368 2197   11

table(NSCH_2022_subset_NY_clean$CSHCN_22, useNA = "ifany")

## 
##    0    1 
## 3537 1039

table(NSCH_2022_subset_NY_clean$NYC_Metro, useNA = "ifany")

## 
##    0    1 
## 2404 2172

#Convert to factor variables and create NYC_Metro binary variable
NSCH_2022_subset_NY_clean <- NSCH_2022_subset_NY_clean %>%
  mutate(
    CSHCN_22 = factor(ifelse(CSHCN_22 == 1, "Yes", "No")),
    MedHome_22 = factor(ifelse(MedHome_22 == 1, "Has a medical home", "Does not have a medical home")),
    NYC_Metro = factor(ifelse(NYC_Metro == 1, "NYC metro", "Rest of state")),
    raceASIA_22 = factor(raceASIA_22, levels=c(1,2,3,4,5), labels=c("Hispanic", "White, non-Hispanic", "Black, non-Hispanic", "Asian, non-Hispanic", "Multi-racial, non-Hispanic")),
    PrntNativity_22 = factor(PrntNativity_22, levels=c(1,2,3), labels=c("All parents born in the US (3rd or higher generation HH)", "Any parent born outside of the US (1st and 2nd generation HH)", "Other (child born in United States, parents are not listed)")),
    HHLanguage_22 = factor(HHLanguage_22, levels=c(1,2), labels=c("English", "Other than English")),
    AdultEduc_22 = factor(AdultEduc_22, levels=c(1,2,3,4), labels=c("Less than high school", "High school or GED", "Some college or technical school", "College degree or higher")),
    povlev4_22 = factor(povlev4_22, levels=c(1,2,3,4), labels=c("0-99% FPL", "100-199% FPL", "200-399% FPL", "400% FPL or greater")),
    instype_22 = factor(instype_22, levels=c(1,2,3,4), labels=c("Public health insurance only", "Private health insurance only", "Public and private insurance", "Uninsured")))

#Drop all rows with any NA or missing (or 99) values
NSCH_2022_subset_NY_clean_drop <- NSCH_2022_subset_NY_clean %>% drop_na()

#Load survey weight
survey <- svydesign(id = ~HHID, strata = ~FIPSST + STRATUM, weights = ~FWC, data = NSCH_2022_subset_NY_clean_drop) 

#Determine dummy coding and sample size breakdown for outcome and exposure
summary(NSCH_2022_subset_NY_clean_drop$MedHome_22)

## Does not have a medical home           Has a medical home 
##                         2217                         2105

summary(NSCH_2022_subset_NY_clean_drop$NYC_Metro)

##     NYC metro Rest of state 
##          2025          2297

#Determine mean for medical home, comparing with publicly available data (45.4%)
svymean(~MedHome_22, survey, na.rm = TRUE) * 100

##                                          mean     SE
## MedHome_22Does not have a medical home 54.553 0.0103
## MedHome_22Has a medical home           45.447 0.0103

#Create descriptive table of medhome and all covariates
survey %>%
  tbl_svysummary(
    by=MedHome_22,
    include=c(MedHome_22, NYC_Metro, raceASIA_22, HHLanguage_22, PrntNativity_22, 
              AdultEduc_22, povlev4_22, instype_22),
    percent = "row", 
    statistic = list(all_continuous() ~ "{mean} ({sd})", 
                     all_categorical() ~  "{n_unweighted} ({p}%)"),
    digits = NULL, 
    label = list(
      NYC_Metro ~ "Region",
      raceASIA_22     ~ "Race/Ethnicity of Child",
      HHLanguage_22           ~ "Primary household language",
      PrntNativity_22      ~ "Generational status of child’s parents",
      AdultEduc_22           ~ "Highest education of adults in household",
      povlev4_22      ~ "Income level of child’s household ",
      instype_22      ~ "Insurance type")
  ) %>%
  modify_header(update = list(label ~ "",
                              stat_1 ~ "**Does not have a medical home**, n = 2,217",
                              stat_2 ~ "**Has a medical home**, n = 2,105"))%>%
  italicize_levels() %>% 
  bold_labels() %>%
  add_n(statistic = "{N_obs_unweighted}") %>%
  modify_caption("Table 1. Descriptive Statistics — NSCH 2022 Analytic Sample (n = 4,322)") %>%
  as_flex_table()

## Warning: The `update` argument of `modify_header()` is deprecated as of gtsummary 2.0.0.
## ℹ Use `modify_header(...)` input instead. Dynamic dots allow for syntax like
##   `modify_header(!!!list(...))`.
## ℹ The deprecated feature was likely used in the gtsummary package.
##   Please report the issue at <https://github.com/ddsjoberg/gtsummary/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Table 1. Descriptive Statistics — NSCH 2022 Analytic Sample (n = 4,322)
	N	Does not have a medical home, n = 2,2171	Has a medical home, n = 2,1051
Region	4,322
NYC metro		1,112 (58%)	913 (42%)
Rest of state		1,105 (51%)	1,192 (49%)
Race/Ethnicity of Child	4,322
Hispanic		520 (62%)	322 (38%)
White, non-Hispanic		1,064 (46%)	1,327 (54%)
Black, non-Hispanic		224 (67%)	137 (33%)
Asian, non-Hispanic		264 (65%)	171 (35%)
Multi-racial, non-Hispanic		145 (50%)	148 (50%)
Primary household language	4,322
English		1,766 (50%)	1,922 (50%)
Other than English		451 (71%)	183 (29%)
Generational status of child’s parents	4,322
All parents born in the US (3rd or higher generation HH)		1,268 (49%)	1,460 (51%)
Any parent born outside of the US (1st and 2nd generation HH)		809 (62%)	564 (38%)
Other (child born in United States, parents are not listed)		140 (69%)	81 (31%)
Highest education of adults in household	4,322
Less than high school		119 (72%)	44 (28%)
High school or GED		318 (67%)	181 (33%)
Some college or technical school		465 (58%)	369 (42%)
College degree or higher		1,315 (47%)	1,511 (53%)
Income level of child’s household	4,322
0-99% FPL		426 (71%)	188 (29%)
100-199% FPL		465 (65%)	287 (35%)
200-399% FPL		537 (53%)	536 (47%)
400% FPL or greater		789 (41%)	1,094 (59%)
Insurance type	4,322
Public health insurance only		722 (65%)	422 (35%)
Private health insurance only		1,273 (45%)	1,595 (55%)
Public and private insurance		115 (69%)	62 (31%)
Uninsured		107 (80%)	26 (20%)
1n (unweighted) (%)

#Create basic bar graph of medical home raw data
ggplot(survey, aes(x=MedHome_22)) +
  geom_bar(fill = "steelblue", alpha=0.7) +
  labs(x = "Medical Home Status", y = "Count", title = "Medical Home Status in NYS Children")

#Create descriptive statistics on survey data and plot it on a bar graph
medhome_region<-svyby(formula = ~MedHome_22, by = ~NYC_Metro, design = survey, FUN = svymean, na.rm=T)
medhome_region_rename <- medhome_region %>%
    rename(
      MHNo = `MedHome_22Does not have a medical home`, 
      se.MHNo = `se.MedHome_22Does not have a medical home`,
      MHYes = `MedHome_22Has a medical home`,
      se.MHYes = `se.MedHome_22Has a medical home`)

ggplot(medhome_region_rename) +
  geom_bar(aes(x=NYC_Metro, y=MHYes), stat="identity", fill = "steelblue", alpha=0.7) +
  geom_errorbar( aes(x=NYC_Metro, ymin=MHYes - se.MHYes, ymax=MHYes + se.MHYes), width=0.4, alpha=0.9) +
  theme_minimal() +
  scale_y_continuous(labels=scales::percent) +
  labs(x = "New York State Region", y = "Percent with Medical Home", title = "Medical Home Percentage by Region")

#Exploratory visualization
medhome_income<-svyby(formula = ~MedHome_22, by = ~povlev4_22, design = survey, FUN = svymean, na.rm=T)

ggplot(medhome_income) +
  geom_bar(aes(x=povlev4_22, y=`MedHome_22Has a medical home`), stat="identity", fill = "steelblue", alpha=0.7) +
  theme_minimal() +
  scale_y_continuous(labels=scales::percent) +
  labs(x = "Income", y = "Percent with Medical Home", title = "Medical Home Percentage by Income")

medhome_race<-svyby(formula = ~MedHome_22, by = ~raceASIA_22, design = survey, FUN = svymean, na.rm=T)

ggplot(medhome_race) +
  geom_bar(aes(x=raceASIA_22, y=`MedHome_22Has a medical home`), stat="identity", fill = "steelblue", alpha=0.7) +
  theme_minimal() +
  scale_y_continuous(labels=scales::percent) +
  labs(x = "Race", y = "Percent with Medical Home", title = "Medical Home Percentage by Race")

EPI553_FinalProject_CheckIn1_GoldmanMatthew

Matthew Goldman

2026-03-11