rm(list = ls())

library(tidyr)
library(stringr)
library(skimr)
library(dplyr)
library(pander)
library(broom)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(plotly)
library(gridExtra)
library(nnet)
library(kableExtra)

# Define a couple of functions for model comparison tables

# for formulas
get_formula <- function(prefix, n) {
  forms <- eval(parse(text = 
                       paste0("list(", 
                              paste0(prefix, 
                                     1:n, 
                                     "$terms", collapse = ","), 
                              ")")))
  out <- sapply(forms, FUN = format)
  return(out)
}

# for aics, need to also specify model type because
# object structures returned from AIC() are different

get_aics <- function(prefix, n, type) {
  mods <- eval(parse(text = 
                      paste0("list(", 
                             paste0(prefix, 
                                    1:n,
                                    collapse = ","), 
                             ")")))
  aics <- sapply(mods, FUN = AIC)
  
  if(type == "mlm"){
    out = aics
  } else {
    out = aics[2,]
    }
  
  return(out)
}

Back to Outline

Introduction

This report explores the income and insurance distributions from the WHAMP survey data.

Income

Income is not used in the EpiModel analyses because we do not have a way to model how income changes over the lifecourse. There are two types of income effects of interest that we therefore can not capture.

  • DAP income thresholds – From discussion with WADOH, these thresholds are not currently implemented as barriers to eligibility. For ADAP, people are primarily linked to appropriate insurance programs. For PDAP, supply of subsidies has been sufficient to meet demand. Note: this may change in the future

  • Effects on other input parameters – We use models to estimate parameters for the key dynamics in the simulation: sexual behavior (via stergms), and the PrEP and ART continua (via survival models, logistic regressions and other standard statistical methods). For the latter, we do include income in our exploratory modeling to determine whether we are missing an important predictor. In most cases we have found little impact after including age, race, region and SNAP.

We examine income in this report in order to understand the characteristics of the the WHAMP survey sample population. The WHAMP survey asked about household income (in categories) and number of dependents. These are combined with the FPL to construct the household income as a fraction of the FPL. If income is ever to be used as a DAP entry criteria, this is the standardized value that would be employed.

Insurance

Insurance is used in the economic analyses, and influences the cost estimates of the two DAPs. But it is used in a mechanistic way, by reverse engineering the relationship between the DAP costs and insurance status.

  • As with income, we do not have a way to model how insurance changes over the life course. So we do not use insurance to predict DAP enrollment, we instead assign insurance after DAP enrollment, based on the distribution observed for DAP clients. Since the State does assist DAP applicants with finding insurance, this is not as crazy as it sounds.

  • Note: The data used for the insurance analysis is the WADOH DAP program data, not the WHAMP survey data. For that analysis, please see this report.

Data

All of the data in this report come from the WHAMP Survey.

# Either make or read in data

dataset <- "readin"
#dataset <- "makeit"

if(dataset == "makeit") {
  source(here::here("MakeData", "MM", "Scripts",
                    "makeWideData.R"))
} else {
  wDF <- readRDS(here::here("MakeData", "MM", "Data",
                               "wideDF.RDS"))
}

# make sure this is updated if changed in makeWideData.R
# fpl is from: https://aspe.hhs.gov/2019-poverty-guidelines
# we use the guideline, not the threshold here

adap_fpl_cut <- data.frame(hhsize = c(1:8), 
                           fpl = c(12490, 16910, 21330, 25750,
                                   30170, 34590, 39010, 43430))

Missing data

Missing values in the WHAMP Survey demographics come mostly from insurance and income.

  • Income is missing for rscales::percent(proportions(table(is.na(wDF$hhincome2)))[[2]])` of respondents, with no pattern by age.

  • Income is missing for rscales::percent(proportions(table(is.na(wDF$insurance)))[[2]])` of respondents, with higher rates for the youngest age group.

Overall distributions

table(wDF$hhincome2, useNA = "al") %>%
  kable(caption= "Household Income (Original Variable)") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Household Income (Original Variable)
Var1 Freq
$0-19,999 94
$20,000-29,999 89
$30,000-39,999 72
$40,000-49,999 73
$50,000-59,999 53
$60,000-69,999 47
$70,000-79,999 58
$80,000+ 360
NA 81
table(wDF$insurance, useNA = "al") %>% 
  kable(caption= "Insurance (Combined Variable)") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Insurance (Combined Variable)
Var1 Freq
emplyr 577
indvdl_othr 61
medicare_caid 136
othr_gov 54
uninsrd 43
NA 56

By age

proportions(table(wDF$age_group, is.na(wDF$hh_income)), 1) %>%
  kable(caption= "Missing income by age", digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Missing income by age
FALSE TRUE
15-24 0.96 0.04
25-34 0.97 0.03
35-44 0.97 0.03
45-54 0.96 0.04
55-65 0.97 0.03
# but missingness is related to age for insurance
proportions(table(wDF$age_group, is.na(wDF$insurance)), 1) %>% 
  kable(caption= "Missing insurance by age", digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Missing insurance by age
FALSE TRUE
15-24 0.85 0.15
25-34 0.95 0.05
35-44 0.97 0.03
45-54 0.93 0.07
55-65 0.96 0.04

With explicit missing levels

Here we use income_cat for fpl adjusted

wDF <- wDF %>%
  mutate(insurance = ifelse(is.na(insurance),
                            "Missing", insurance),
         insurance = forcats::fct_relevel(insurance, "Missing", 
                                          after = Inf),
         income_cat = forcats::fct_explicit_na(income_cat,
                                               "Missing"),
         income_cat = forcats::fct_relevel(income_cat, "Missing", 
                                          after = Inf))


proportions(table(wDF$age_group, wDF$insurance), 1) %>% 
  kable(caption= "Final insurance by age", digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Final insurance by age
emplyr indvdl_othr medicare_caid othr_gov uninsrd Missing
15-24 0.53 0.04 0.15 0.05 0.07 0.15
25-34 0.66 0.05 0.12 0.07 0.06 0.05
35-44 0.68 0.07 0.13 0.04 0.04 0.03
45-54 0.61 0.10 0.12 0.07 0.04 0.07
55-65 0.58 0.08 0.22 0.06 0.02 0.04
proportions(table(wDF$age_group, wDF$income_cat), 1) %>% 
  kable(caption= "Final income categories by age", digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Final income categories by age
fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
15-24 0.25 0.27 0.08 0.04 0.01 0.16 0.19
25-34 0.09 0.27 0.10 0.09 0.07 0.31 0.08
35-44 0.10 0.13 0.11 0.07 0.02 0.51 0.06
45-54 0.09 0.13 0.06 0.05 0.03 0.57 0.08
55-65 0.09 0.22 0.08 0.04 0.01 0.49 0.08
# Create the output object
save_out <- list()

HH income and dependents

Income

Table

These are the weighted frequencies for income.

wDF %>% group_by(hhincome2) %>%
  summarize(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  mutate(prop.wtd = n.wtd / sum(n.wtd)) %>%
  kable(caption = "Household Income", digits = c(1,0,1,2)) %>%
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Household Income
hhincome2 n n.wtd prop.wtd
$0-19,999 94 86.6 0.09
$20,000-29,999 89 95.0 0.10
$30,000-39,999 72 65.6 0.07
$40,000-49,999 73 69.9 0.08
$50,000-59,999 53 52.2 0.06
$60,000-69,999 47 42.0 0.05
$70,000-79,999 58 59.6 0.06
$80,000+ 360 375.5 0.41
NA 81 80.7 0.09

Plot

wDF %>% group_by(hhincome2) %>%
  summarize(n.wtd = sum(ego.wawt)) %>%
  mutate(prop.wtd = n.wtd / sum(n.wtd)) %>%
  # ggplot(aes(x = sjlabelled::as_label(hh_income), y = prop.wtd)) +
  ggplot(aes(x = hhincome2, y = prop.wtd)) +
  geom_bar(stat="identity", fill = "blue", alpha = 0.5) +
  labs(title = "Household income",
       x = "HH income category",
       y = "Wtd proportion") +
  theme(axis.text.x = element_text(size = 8, angle = 50, 
                                   vjust = 0.6))

Dependents

Overall

tempDF <- wDF %>% 
  group_by(DEPEND) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% 
  kable(caption= paste("HH Dependents; n = ",
                       sum(tempDF$n)),
        digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) 
HH Dependents; n = 927
DEPEND n n.wtd prop.wtd
1 416 431.6 0.47
2 311 304.3 0.33
3 49 45.2 0.05
4 41 37.0 0.04
5 17 15.1 0.02
6 8 9.4 0.01
7 2 1.7 0.00
NA 83 82.7 0.09

By Income

  • Single person HHs show a distinct pattern, with more low income and fewer high income earners. Roughly a third of these HH are in the bottom two categories, and just over 25% are in the top category

  • With 2+ in the HH, the majority (over 60% of HH) are in the top income category, with the rest relatively evenly distributed in the lower categories.

  • For the 5+ HH, there are slightly fewer in the top category, and slightly more in the middle.

tempDF <- wDF %>% 
  filter(hhincome2 != 88 & !is.na(DEPEND)) %>% 
  mutate(depend = factor(if_else(DEPEND > 5, 5, DEPEND),
                         levels = c(1:5),
                         labels = c(as.character(1:4), "5+"))) %>%
  group_by(depend, hhincome2) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(depend) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% 
  ggplot(aes(x = hhincome2, y = prop.wtd,
             group = depend, color = depend)) +
  geom_line(lwd=1) +
  labs(title = "Income distribution by dependents",
       x = "HH income",
       y = "wtd proportion",
       color = "Dependents") +
  scale_color_brewer(palette = "Spectral") +
  #theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust=1))

FPL adjusted income

The adjustment procedure:

  1. Match the FPL by the number of dependents for each respondent.

  2. Divide the original income bounds by the FPL. This resulted in an income interval with a lower bound and an upper bound of FPL of each individual.

  3. Roughly classify individuals’ income into 5 income categories <138% FPL, 138%-300% FPL, 300%-400% FPL, 500%-600% FPL, and >600% FPL. Then calculate the overlapping proportion of the interval in step 2 and the intervals defined in this step. If the overlapping proportion is over 50% in an income category, we assign the individual to this category.

  4. If a respondent originally said that their income was above $80,000 per year, the respondent was assigned in the income category >600% FPL no matter how many dependents this respondent had.

Descriptives

Overall

  • In general, individuals with lower income shifted their income categories after adjusting for the number of dependents.
tempDF <- wDF %>%
  filter(!is.na(income_cat)) %>%
  group_by(income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  ungroup() %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) 

p_income <- ggplot(tempDF, 
                   aes(x = income_cat, y = prop.wtd, 
                       text = prop.wtd)) +
  geom_bar(fill = "blue", alpha = 0.5,
           stat="identity",
           position = position_dodge2(width = 0.9)) +
  labs(title = "Income distribution in FPL categories",
       sub = "Not adjusted for dependents",
       caption = "Weighted WHAMP survey data",
       x= "income FPL category",
       y = "weighted proportion") +
  ylim(0, 0.5)

title.plotly <-
  list(text = paste0('Income distribution in FPL categories',
                     '<br>',
                     '<sup>',
                     'Not adjusted for dependents','</sup>'))

fig <- ggplotly(p_income , tooltip = "text",
                width = 600, height = 400) %>%
  layout(hoverlabel = list(font=list(color="white")),
         title = title.plotly)

fig

Age

Table

tempDF <- wDF %>% 
  filter(!is.na(income_cat)) %>% 
  mutate(income_cat = factor(income_cat,
                             levels = sort(unique(income_cat)))) %>%
  group_by(age_group, income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of income by age; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5))
Distribution of income by age; n = 927
Age group
income_cat 15-24 25-34 35-44 45-54 55-65
fpl138 0.21 0.08 0.09 0.08 0.08
fpl138-300 0.30 0.26 0.11 0.11 0.22
fpl300-400 0.08 0.09 0.11 0.06 0.08
fpl400-500 0.04 0.10 0.07 0.04 0.03
fpl500-600 0.01 0.07 0.01 0.02 0.01
fpl600+ 0.19 0.33 0.55 0.63 0.50
Missing 0.18 0.08 0.06 0.06 0.09

Plot

mycol = RColorBrewer::brewer.pal(length(unique(wDF$age_group))+1,
                                 'Oranges')[-1]

ggplot(tempDF, 
       aes(x = income_cat, y = prop.wtd,
           group = age_group, color = age_group)) +
  geom_line(size = 1) +
  geom_point() +
  labs(title = "FPL adjusted income group by age",
       x = "FPL adjusted income group",
       color = "Age") +
  scale_color_manual(values=mycol) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))

Race

Table

tempDF <- wDF %>% 
  group_by(race, income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  arrange(income_cat) %>%
  kable(caption= paste("Distribution of income by race; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3))
Distribution of income by race; n = 927
Race
income_cat B H O
fpl138 0.08 0.14 0.10
fpl138-300 0.30 0.25 0.18
fpl300-400 0.10 0.08 0.08
fpl400-500 0.06 0.10 0.05
fpl500-600 NA 0.06 0.02
fpl600+ 0.39 0.25 0.47
Missing 0.07 0.11 0.09

Plot

ggplot(tempDF, 
       aes(x = income_cat, y = prop.wtd,
           group = race, color = race)) +
  geom_line(size = 1, alpha = 0.7) +
  geom_point() +
  labs(title = "FPL adjusted income group by age",
       x = "FPL adjusted income group",
       color = "Age") +
#  scale_color_manual(values=mycol) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))

Region

Table

tempDF <- wDF %>% 
  group_by(region, income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  arrange(income_cat) %>%
  kable(caption= paste("Distribution of income by region; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "region" = 3))
Distribution of income by region; n = 927
region
income_cat EasternWA King WesternWA
fpl138 0.20 0.06 0.14
fpl138-300 0.27 0.17 0.23
fpl300-400 0.09 0.07 0.11
fpl400-500 0.05 0.05 0.07
fpl500-600 0.03 0.03 0.02
fpl600+ 0.24 0.54 0.32
Missing 0.11 0.07 0.11

Plot

ggplot(tempDF, 
       aes(x = income_cat, y = prop.wtd,
           group = region, color = region)) +
  geom_line(size = 1, alpha = 0.7) +
  geom_point() +
  labs(title = "FPL adjusted income group by age",
       x = "FPL adjusted income group",
       color = "Age") +
#  scale_color_manual(values=mycol) +
  theme(axis.text.x = element_text(angle = 45, hjust=1))

Prediction: Adjusted Income

Multinomial regressions

  • The coefficients are relative risk ratios.

Model 1

# multinom only uses complete cases, and inconsistently handles missing
# cases which breaks other functions like summary()

cDFi <- wDF %>% filter(complete == 1 & income_cat != "Missing")

inc_mlm1 <- multinom(income_cat ~ diag.status, 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  18 (10 variable)
## initial  value 1510.111738 
## iter  10 value 1207.124161
## final  value 1205.269549 
## converged
#summary(inc_mlm1)
kable(tidy(inc_mlm1, exponentiate = T), digits = 2, 
      caption = "Model 1") %>% 
  kable_styling(full_width = F, 
                position="center", 
                bootstrap_options = c("striped"))
Model 1
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 1.92 0.13 4.91 0.00
fpl138-300 diag.status 1.01 0.42 0.03 0.97
fpl300-400 (Intercept) 0.81 0.16 -1.34 0.18
fpl300-400 diag.status 1.10 0.50 0.20 0.84
fpl400-500 (Intercept) 0.55 0.18 -3.30 0.00
fpl400-500 diag.status 1.19 0.54 0.31 0.75
fpl500-600 (Intercept) 0.27 0.23 -5.57 0.00
fpl500-600 diag.status 0.40 1.05 -0.88 0.38
fpl600+ (Intercept) 4.15 0.12 11.87 0.00
fpl600+ diag.status 1.20 0.37 0.48 0.63

Model 2

inc_mlm2 <- multinom(income_cat ~ age_group, 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  36 (25 variable)
## initial  value 1510.111738 
## iter  10 value 1170.486371
## iter  20 value 1142.577044
## iter  30 value 1139.946598
## final  value 1139.943809 
## converged
kable(tidy(inc_mlm2, exponentiate = T), digits = 2, 
      caption= "Model 2") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 1.42 0.23 1.55 0.12
fpl138-300 age_group25-34 2.12 0.35 2.17 0.03
fpl138-300 age_group35-44 0.91 0.40 -0.23 0.82
fpl138-300 age_group45-54 1.00 0.43 -0.01 0.99
fpl138-300 age_group55-65 1.98 0.39 1.77 0.08
fpl300-400 (Intercept) 0.36 0.33 -3.01 0.00
fpl300-400 age_group25-34 3.08 0.46 2.46 0.01
fpl300-400 age_group35-44 3.56 0.47 2.71 0.01
fpl300-400 age_group45-54 2.01 0.55 1.27 0.20
fpl300-400 age_group55-65 2.61 0.51 1.88 0.06
fpl400-500 (Intercept) 0.19 0.43 -3.84 0.00
fpl400-500 age_group25-34 6.16 0.53 3.41 0.00
fpl400-500 age_group35-44 4.18 0.57 2.50 0.01
fpl400-500 age_group45-54 2.78 0.65 1.58 0.11
fpl400-500 age_group55-65 1.93 0.68 0.97 0.33
fpl500-600 (Intercept) 0.04 0.90 -3.62 0.00
fpl500-600 age_group25-34 20.32 0.96 3.13 0.00
fpl500-600 age_group35-44 4.58 1.10 1.38 0.17
fpl500-600 age_group45-54 8.41 1.07 2.00 0.05
fpl500-600 age_group55-65 2.34 1.30 0.66 0.51
fpl600+ (Intercept) 0.88 0.25 -0.52 0.60
fpl600+ age_group25-34 4.40 0.36 4.13 0.00
fpl600+ age_group35-44 7.35 0.37 5.44 0.00
fpl600+ age_group45-54 9.60 0.39 5.79 0.00
fpl600+ age_group55-65 7.27 0.38 5.16 0.00

Model 3

inc_mlm3 <- multinom(income_cat ~ age_group + factor(race), 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  48 (35 variable)
## initial  value 1510.111738 
## iter  10 value 1168.693774
## iter  20 value 1128.622592
## iter  30 value 1126.846067
## iter  40 value 1126.569792
## iter  50 value 1126.565275
## final  value 1126.565256 
## converged
kable(tidy(inc_mlm3, exponentiate = T), digits = 2, 
      caption= "Model 3") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 2.89 0.55 1.93 0.05
fpl138-300 age_group25-34 2.08 0.35 2.11 0.04
fpl138-300 age_group35-44 0.90 0.40 -0.26 0.79
fpl138-300 age_group45-54 0.99 0.43 -0.03 0.98
fpl138-300 age_group55-65 2.03 0.39 1.83 0.07
fpl138-300 factor(race)H 0.50 0.61 -1.13 0.26
fpl138-300 factor(race)O 0.46 0.53 -1.46 0.15
fpl300-400 (Intercept) 0.58 0.68 -0.79 0.43
fpl300-400 age_group25-34 3.02 0.46 2.42 0.02
fpl300-400 age_group35-44 3.53 0.47 2.69 0.01
fpl300-400 age_group45-54 1.97 0.55 1.24 0.22
fpl300-400 age_group55-65 2.58 0.51 1.85 0.06
fpl300-400 factor(race)H 0.49 0.75 -0.96 0.34
fpl300-400 factor(race)O 0.63 0.63 -0.72 0.47
fpl400-500 (Intercept) 0.22 0.82 -1.85 0.06
fpl400-500 age_group25-34 6.24 0.53 3.43 0.00
fpl400-500 age_group35-44 4.16 0.57 2.50 0.01
fpl400-500 age_group45-54 2.84 0.65 1.61 0.11
fpl400-500 age_group55-65 2.03 0.68 1.04 0.30
fpl400-500 factor(race)H 1.17 0.82 0.19 0.85
fpl400-500 factor(race)O 0.78 0.73 -0.34 0.74
fpl500-600 (Intercept) 0.00 0.61 -25.46 0.00
fpl500-600 age_group25-34 21.47 0.96 3.18 0.00
fpl500-600 age_group35-44 4.64 1.10 1.39 0.16
fpl500-600 age_group45-54 8.93 1.07 2.05 0.04
fpl500-600 age_group55-65 2.52 1.30 0.71 0.48
fpl500-600 factor(race)H 347946.96 0.44 29.24 0.00
fpl500-600 factor(race)O 168177.03 0.38 31.32 0.00
fpl600+ (Intercept) 1.04 0.56 0.08 0.94
fpl600+ age_group25-34 4.29 0.36 4.04 0.00
fpl600+ age_group35-44 7.34 0.37 5.42 0.00
fpl600+ age_group45-54 9.30 0.39 5.69 0.00
fpl600+ age_group55-65 6.86 0.39 4.99 0.00
fpl600+ factor(race)H 0.40 0.61 -1.50 0.13
fpl600+ factor(race)O 0.93 0.53 -0.13 0.89

Model 4

inc_mlm4 <- multinom(income_cat ~ age_group + 
                       factor(race) + factor(region), 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  60 (45 variable)
## initial  value 1510.111738 
## iter  10 value 1141.915601
## iter  20 value 1097.041447
## iter  30 value 1094.935028
## iter  40 value 1094.415678
## iter  50 value 1094.348000
## final  value 1094.347252 
## converged
kable(tidy(inc_mlm4, exponentiate = T), digits = 2, 
      caption= "Model 4") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 1.75 0.64 0.87 0.38
fpl138-300 age_group25-34 2.07 0.35 2.09 0.04
fpl138-300 age_group35-44 0.92 0.40 -0.20 0.84
fpl138-300 age_group45-54 1.02 0.44 0.04 0.97
fpl138-300 age_group55-65 2.14 0.39 1.95 0.05
fpl138-300 factor(race)H 0.60 0.62 -0.83 0.41
fpl138-300 factor(race)O 0.52 0.54 -1.22 0.22
fpl138-300 factor(region)King 1.92 0.37 1.74 0.08
fpl138-300 factor(region)WesternWA 1.25 0.37 0.60 0.55
fpl300-400 (Intercept) 0.28 0.81 -1.56 0.12
fpl300-400 age_group25-34 3.02 0.46 2.40 0.02
fpl300-400 age_group35-44 3.63 0.47 2.73 0.01
fpl300-400 age_group45-54 2.01 0.55 1.26 0.21
fpl300-400 age_group55-65 2.72 0.51 1.95 0.05
fpl300-400 factor(race)H 0.61 0.75 -0.65 0.51
fpl300-400 factor(race)O 0.73 0.63 -0.50 0.62
fpl300-400 factor(region)King 2.38 0.49 1.78 0.08
fpl300-400 factor(region)WesternWA 1.61 0.48 0.99 0.32
fpl400-500 (Intercept) 0.08 0.98 -2.60 0.01
fpl400-500 age_group25-34 6.23 0.54 3.41 0.00
fpl400-500 age_group35-44 4.34 0.57 2.55 0.01
fpl400-500 age_group45-54 2.94 0.65 1.65 0.10
fpl400-500 age_group55-65 2.19 0.68 1.15 0.25
fpl400-500 factor(race)H 1.54 0.82 0.52 0.60
fpl400-500 factor(race)O 0.93 0.74 -0.10 0.92
fpl400-500 factor(region)King 3.40 0.58 2.10 0.04
fpl400-500 factor(region)WesternWA 1.95 0.59 1.13 0.26
fpl500-600 (Intercept) 0.00 0.73 -21.69 0.00
fpl500-600 age_group25-34 21.14 0.97 3.16 0.00
fpl500-600 age_group35-44 4.83 1.11 1.42 0.15
fpl500-600 age_group45-54 9.60 1.07 2.11 0.03
fpl500-600 age_group55-65 2.75 1.30 0.78 0.44
fpl500-600 factor(race)H 285573.47 0.47 26.66 0.00
fpl500-600 factor(race)O 127991.23 0.45 26.20 0.00
fpl500-600 factor(region)King 3.06 0.74 1.51 0.13
fpl500-600 factor(region)WesternWA 1.22 0.78 0.25 0.80
fpl600+ (Intercept) 0.21 0.67 -2.33 0.02
fpl600+ age_group25-34 4.30 0.37 3.92 0.00
fpl600+ age_group35-44 8.09 0.38 5.48 0.00
fpl600+ age_group45-54 10.74 0.41 5.83 0.00
fpl600+ age_group55-65 8.08 0.40 5.22 0.00
fpl600+ factor(race)H 0.58 0.63 -0.86 0.39
fpl600+ factor(race)O 1.24 0.54 0.40 0.69
fpl600+ factor(region)King 7.27 0.38 5.17 0.00
fpl600+ factor(region)WesternWA 1.70 0.38 1.38 0.17

Comparison of AICs

models <- get_formula("inc_mlm", 4)
aics <- get_aics("inc_mlm", 4, "mlm")

kable(data.frame(model = models, 
                 AIC = aics),
      digits = 0,
      caption= paste("Comparison of AICs; n =", dim(cDFi)[[1]])) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Comparison of AICs; n = 842
model AIC
income_cat ~ diag.status 2431
income_cat ~ age_group 2330
income_cat ~ age_group + factor(race) 2323
income_cat ~ age_group + factor(race) + factor(region) 2279
  • We will look at the predictions for model 4, including age + race + region.

Predicted distribution

# Construct and save output
expand_df <- expand.grid(
  age_group = factor(c(1:5), 
                     levels = c(1:5), 
                     labels = levels(wDF$age_group)), 
  race = factor(c("B", "H", "O")), 
  region = c("EasternWA", "King", "WesternWA"))

pred_inc <- cbind(expand_df, 
                  predict(inc_mlm4, 
                          newdata = expand_df, 
                          "probs"))

pred_inc <- pred_inc %>% 
  gather(income, prob, `fpl138`:`fpl600+`)

# lb & ub values are income as percent of fpl given HHdependents
income_levels <- data.frame(lvl = c(1:6), 
                            lb = c(0, 138, 300, 400, 500, 600), 
                            ub = c(137, 299, 399, 499, 599, 1000))

# Save output
save_out$pred_inc <- pred_inc
save_out$income_levels <- income_levels
ggplot(data = pred_inc, aes(x = income, y = prob, group = race)) +
  geom_line(aes(color = race)) +
  facet_wrap(~ region + age_group, ncol = 5) +
  ylim(0, 0.8) +
  labs(title = "FPL income group by race, region and age",
       y = "predicted distribution") + 
  theme(axis.text.x = element_text(size = 8, angle = 30, hjust = 0.5),
        strip.text.x = element_text(size = 8),
        legend.position = "bottom")

FPL guidelines

  • These are the federal poverty level (FPL) guidelines by the number of dependents in the household. Source here.
kable(adap_fpl_cut, 
      caption= "FPL by dependents") %>% 
  kable_styling(full_width=F, position="center") 
FPL by dependents
hhsize fpl
1 12490
2 16910
3 21330
4 25750
5 30170
6 34590
7 39010
8 43430

Insurance

Descriptive statistics

Overall

#with(cDF, table(insurance, useNA = "al"))

cDF <- wDF %>% filter(complete==1)

cDF %>%
  group_by(insurance) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "Distribution of insurance") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Distribution of insurance
insurance n n.wtd prop.wtd
emplyr 576 586.9 0.64
indvdl_othr 61 54.9 0.06
medicare_caid 134 120.9 0.13
othr_gov 54 56.2 0.06
uninsrd 42 47.3 0.05
Missing 56 57.5 0.06

HIV status

Table

tempDF <- cDF %>% 
  mutate(dx.status = ifelse(diag.status == 1, "HIV+", "HIV-")) %>%
  group_by(dx.status, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(dx.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = dx.status,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by Dx status; n =",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx status" = 2))
Distribution of insurance by Dx status; n = 923
Dx status
insurance HIV- HIV+
emplyr 0.64 0.60
indvdl_othr 0.06 0.04
medicare_caid 0.12 0.21
othr_gov 0.05 0.14
uninsrd 0.06 NA
Missing 0.07 0.01

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = dx.status, fill = dx.status)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))

title.plotly <-
  list(text = paste0('Insurance by HIV Dx',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Age

Table

tempDF <- cDF %>% 
  group_by(age_group, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by age group; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age Group" = 5))
Distribution of insurance by age group; n = 923
Age Group
insurance 15-24 25-34 35-44 45-54 55-65
emplyr 0.54 0.67 0.71 0.64 0.59
indvdl_othr 0.03 0.04 0.06 0.09 0.07
medicare_caid 0.13 0.12 0.10 0.10 0.22
othr_gov 0.05 0.07 0.06 0.06 0.06
uninsrd 0.11 0.05 0.04 0.05 0.02
Missing 0.15 0.05 0.03 0.06 0.03

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))

title.plotly <-
  list(text = paste0('Insurance distribution by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

Almost all of the uninsured are Black or Hispanic.

tempDF <- cDF %>% 
  group_by(race, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by race; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3))
Distribution of insurance by race; n = 923
Race
insurance B H O
emplyr 0.47 0.52 0.67
indvdl_othr 0.02 0.02 0.07
medicare_caid 0.13 0.15 0.13
othr_gov 0.18 0.12 0.04
uninsrd 0.19 0.13 0.03
Missing 0.02 0.07 0.07

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Insurance distribution by race',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- cDF %>% 
  group_by(region, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by region; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3))
Distribution of insurance by region; n = 923
Region
insurance EasternWA King WesternWA
emplyr 0.46 0.71 0.56
indvdl_othr 0.10 0.06 0.05
medicare_caid 0.22 0.08 0.19
othr_gov 0.04 0.04 0.10
uninsrd 0.08 0.05 0.05
Missing 0.10 0.06 0.05

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  ylab("wtd proportion") +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Insurance distribution by region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

# We show the missing income cases in the table
tempDF <- cDF %>% 
  group_by(income_cat, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by FPL income category; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL income category" = 7))
Distribution of insurance by FPL income category; n = 923
FPL income category
insurance fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
emplyr 0.18 0.49 0.76 0.72 0.92 0.83 0.27
indvdl_othr 0.07 0.08 0.07 0.02 0.02 0.06 0.03
medicare_caid 0.56 0.19 0.08 0.10 NA 0.02 0.15
othr_gov 0.06 0.08 0.06 0.13 0.06 0.05 0.06
uninsrd 0.10 0.13 0.03 0.04 NA 0.02 0.05
Missing 0.05 0.03 0.01 NA NA 0.03 0.44

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Insurance distribution by FPL income category',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Multinomial regressions

insDF <- cDF %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"))

Model 1

ins_mlm1 <- multinom(insurance ~ age_group, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  36 (25 variable)
## initial  value 1655.050008 
## iter  10 value 1132.758539
## iter  20 value 1094.634600
## iter  30 value 1094.503716
## final  value 1094.503684 
## converged
kable(tidy(ins_mlm1, exponentiate = T), 
      caption= "Model 1", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 1
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.057 0.467 -6.117 0.000
indvdl_othr age_group25-34 1.122 0.570 0.203 0.839
indvdl_othr age_group35-44 1.539 0.555 0.776 0.438
indvdl_othr age_group45-54 2.419 0.543 1.627 0.104
indvdl_othr age_group55-65 2.189 0.552 1.420 0.156
medicare_caid (Intercept) 0.236 0.249 -5.797 0.000
medicare_caid age_group25-34 0.752 0.324 -0.879 0.379
medicare_caid age_group35-44 0.568 0.352 -1.605 0.109
medicare_caid age_group45-54 0.666 0.362 -1.124 0.261
medicare_caid age_group55-65 1.566 0.313 1.435 0.151
othr_gov (Intercept) 0.091 0.377 -6.359 0.000
othr_gov age_group25-34 1.099 0.462 0.205 0.838
othr_gov age_group35-44 0.966 0.483 -0.072 0.943
othr_gov age_group45-54 1.037 0.500 0.072 0.942
othr_gov age_group55-65 1.165 0.493 0.309 0.757
uninsrd (Intercept) 0.198 0.268 -6.050 0.000
uninsrd age_group25-34 0.393 0.401 -2.326 0.020
uninsrd age_group35-44 0.268 0.466 -2.820 0.005
uninsrd age_group45-54 0.393 0.448 -2.084 0.037
uninsrd age_group55-65 0.144 0.647 -2.990 0.003
Missing (Intercept) 0.286 0.231 -5.424 0.000
Missing age_group25-34 0.251 0.387 -3.576 0.000
Missing age_group35-44 0.148 0.484 -3.949 0.000
Missing age_group45-54 0.340 0.398 -2.710 0.007
Missing age_group55-65 0.205 0.477 -3.325 0.001

Model 2

ins_mlm2 <- multinom(insurance ~ age_group + race, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  48 (35 variable)
## initial  value 1655.050008 
## iter  10 value 1106.835938
## iter  20 value 1063.399354
## iter  30 value 1063.252499
## final  value 1063.252469 
## converged
kable(tidy(ins_mlm2, exponentiate = T), 
      caption= "Model 2", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.063 0.470 -5.893 0.000
indvdl_othr age_group25-34 1.124 0.570 0.205 0.837
indvdl_othr age_group35-44 1.541 0.556 0.778 0.437
indvdl_othr age_group45-54 2.358 0.544 1.578 0.115
indvdl_othr age_group55-65 2.108 0.553 1.349 0.177
indvdl_othr raceB 0.421 0.941 -0.918 0.359
indvdl_othr raceH 0.525 0.657 -0.981 0.327
medicare_caid (Intercept) 0.216 0.257 -5.972 0.000
medicare_caid age_group25-34 0.754 0.325 -0.868 0.385
medicare_caid age_group35-44 0.569 0.353 -1.595 0.111
medicare_caid age_group45-54 0.686 0.363 -1.038 0.299
medicare_caid age_group55-65 1.633 0.315 1.556 0.120
medicare_caid raceB 1.481 0.420 0.934 0.350
medicare_caid raceH 1.589 0.316 1.465 0.143
othr_gov (Intercept) 0.059 0.403 -7.010 0.000
othr_gov age_group25-34 1.058 0.470 0.119 0.905
othr_gov age_group35-44 0.940 0.491 -0.127 0.899
othr_gov age_group45-54 1.118 0.511 0.218 0.827
othr_gov age_group55-65 1.329 0.504 0.564 0.573
othr_gov raceB 6.259 0.394 4.655 0.000
othr_gov raceH 3.728 0.368 3.574 0.000
uninsrd (Intercept) 0.110 0.317 -6.964 0.000
uninsrd age_group25-34 0.370 0.419 -2.377 0.017
uninsrd age_group35-44 0.257 0.481 -2.822 0.005
uninsrd age_group45-54 0.427 0.467 -1.825 0.068
uninsrd age_group55-65 0.169 0.660 -2.692 0.007
uninsrd raceB 9.499 0.416 5.410 0.000
uninsrd raceH 4.841 0.385 4.100 0.000
Missing (Intercept) 0.287 0.241 -5.193 0.000
Missing age_group25-34 0.256 0.387 -3.520 0.000
Missing age_group35-44 0.150 0.484 -3.917 0.000
Missing age_group45-54 0.346 0.400 -2.657 0.008
Missing age_group55-65 0.208 0.479 -3.279 0.001
Missing raceB 0.372 1.045 -0.946 0.344
Missing raceH 1.153 0.444 0.320 0.749

Model 3

ins_mlm3 <- multinom(insurance ~ age_group + race + 
                       region, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  60 (45 variable)
## initial  value 1655.050008 
## iter  10 value 1111.068012
## iter  20 value 1039.600207
## iter  30 value 1038.912537
## final  value 1038.912285 
## converged
kable(tidy(ins_mlm3, exponentiate = T), 
      caption= "Model 3", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.053 0.492 -5.957 0.000
indvdl_othr age_group25-34 1.157 0.572 0.255 0.799
indvdl_othr age_group35-44 1.578 0.558 0.818 0.413
indvdl_othr age_group45-54 2.445 0.546 1.638 0.101
indvdl_othr age_group55-65 2.137 0.555 1.369 0.171
indvdl_othr raceB 0.458 0.943 -0.828 0.408
indvdl_othr raceH 0.480 0.660 -1.113 0.266
indvdl_othr regionEasternWA 2.795 0.418 2.457 0.014
indvdl_othr regionWesternWA 1.094 0.326 0.277 0.782
medicare_caid (Intercept) 0.120 0.292 -7.274 0.000
medicare_caid age_group25-34 0.810 0.331 -0.639 0.523
medicare_caid age_group35-44 0.596 0.359 -1.443 0.149
medicare_caid age_group45-54 0.722 0.369 -0.882 0.378
medicare_caid age_group55-65 1.649 0.321 1.557 0.119
medicare_caid raceB 1.754 0.428 1.312 0.189
medicare_caid raceH 1.467 0.325 1.178 0.239
medicare_caid regionEasternWA 3.963 0.321 4.286 0.000
medicare_caid regionWesternWA 2.764 0.224 4.545 0.000
othr_gov (Intercept) 0.034 0.444 -7.615 0.000
othr_gov age_group25-34 1.100 0.475 0.200 0.842
othr_gov age_group35-44 0.940 0.496 -0.124 0.901
othr_gov age_group45-54 1.091 0.515 0.170 0.865
othr_gov age_group55-65 1.294 0.508 0.507 0.612
othr_gov raceB 7.079 0.406 4.825 0.000
othr_gov raceH 3.816 0.376 3.564 0.000
othr_gov regionEasternWA 1.611 0.585 0.816 0.415
othr_gov regionWesternWA 3.274 0.304 3.899 0.000
uninsrd (Intercept) 0.088 0.354 -6.877 0.000
uninsrd age_group25-34 0.378 0.421 -2.311 0.021
uninsrd age_group35-44 0.266 0.482 -2.749 0.006
uninsrd age_group45-54 0.453 0.469 -1.691 0.091
uninsrd age_group55-65 0.172 0.661 -2.665 0.008
uninsrd raceB 10.244 0.422 5.513 0.000
uninsrd raceH 4.516 0.389 3.871 0.000
uninsrd regionEasternWA 2.604 0.477 2.005 0.045
uninsrd regionWesternWA 1.371 0.359 0.879 0.379
Missing (Intercept) 0.258 0.275 -4.921 0.000
Missing age_group25-34 0.260 0.389 -3.460 0.001
Missing age_group35-44 0.152 0.486 -3.877 0.000
Missing age_group45-54 0.356 0.401 -2.571 0.010
Missing age_group55-65 0.210 0.480 -3.257 0.001
Missing raceB 0.381 1.048 -0.921 0.357
Missing raceH 1.073 0.448 0.157 0.875
Missing regionEasternWA 2.277 0.422 1.949 0.051
Missing regionWesternWA 1.031 0.323 0.096 0.924

Model 4

ins_mlm4 <- multinom(insurance ~ age_group + race + 
                       region + diag.status, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  66 (50 variable)
## initial  value 1655.050008 
## iter  10 value 1104.083156
## iter  20 value 1029.355743
## iter  30 value 1027.875420
## final  value 1027.870580 
## converged
kable(tidy(ins_mlm4, exponentiate = T), 
      caption= "Model 4", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.054 0.492 -5.947 0.000
indvdl_othr age_group25-34 1.167 0.572 0.270 0.787
indvdl_othr age_group35-44 1.630 0.559 0.875 0.382
indvdl_othr age_group45-54 2.549 0.547 1.709 0.087
indvdl_othr age_group55-65 2.346 0.563 1.515 0.130
indvdl_othr raceB 0.459 0.943 -0.827 0.408
indvdl_othr raceH 0.486 0.661 -1.092 0.275
indvdl_othr regionEasternWA 2.754 0.419 2.417 0.016
indvdl_othr regionWesternWA 1.078 0.326 0.230 0.818
indvdl_othr diag.status 0.640 0.540 -0.827 0.408
medicare_caid (Intercept) 0.118 0.293 -7.298 0.000
medicare_caid age_group25-34 0.799 0.331 -0.679 0.497
medicare_caid age_group35-44 0.559 0.362 -1.604 0.109
medicare_caid age_group45-54 0.672 0.372 -1.067 0.286
medicare_caid age_group55-65 1.425 0.336 1.055 0.291
medicare_caid raceB 1.725 0.429 1.270 0.204
medicare_caid raceH 1.445 0.325 1.132 0.258
medicare_caid regionEasternWA 4.098 0.323 4.372 0.000
medicare_caid regionWesternWA 2.818 0.225 4.613 0.000
medicare_caid diag.status 1.679 0.312 1.661 0.097
othr_gov (Intercept) 0.034 0.444 -7.590 0.000
othr_gov age_group25-34 1.031 0.476 0.065 0.948
othr_gov age_group35-44 0.735 0.512 -0.600 0.548
othr_gov age_group45-54 0.895 0.523 -0.212 0.832
othr_gov age_group55-65 0.867 0.540 -0.264 0.792
othr_gov raceB 6.870 0.412 4.682 0.000
othr_gov raceH 3.499 0.380 3.293 0.001
othr_gov regionEasternWA 1.796 0.586 0.999 0.318
othr_gov regionWesternWA 3.333 0.307 3.925 0.000
othr_gov diag.status 3.020 0.394 2.807 0.005
uninsrd (Intercept) 0.087 0.354 -6.874 0.000
uninsrd age_group25-34 0.391 0.420 -2.236 0.025
uninsrd age_group35-44 0.306 0.484 -2.451 0.014
uninsrd age_group45-54 0.511 0.473 -1.418 0.156
uninsrd age_group55-65 0.233 0.664 -2.190 0.029
uninsrd raceB 10.117 0.423 5.467 0.000
uninsrd raceH 4.716 0.392 3.952 0.000
uninsrd regionEasternWA 2.464 0.479 1.884 0.060
uninsrd regionWesternWA 1.371 0.362 0.871 0.384
uninsrd diag.status 0.000 26.891 -0.291 0.771
Missing (Intercept) 0.260 0.275 -4.903 0.000
Missing age_group25-34 0.266 0.389 -3.406 0.001
Missing age_group35-44 0.165 0.487 -3.698 0.000
Missing age_group45-54 0.397 0.404 -2.290 0.022
Missing age_group55-65 0.263 0.486 -2.751 0.006
Missing raceB 0.377 1.048 -0.931 0.352
Missing raceH 1.100 0.450 0.213 0.831
Missing regionEasternWA 2.210 0.423 1.876 0.061
Missing regionWesternWA 1.012 0.324 0.037 0.971
Missing diag.status 0.177 1.193 -1.451 0.147

Model 5

We keep the missing income level as a predictor in the model

ins_mlm5 <- multinom(insurance ~ age_group + race + 
                       region + diag.status + income_cat, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  102 (80 variable)
## initial  value 1655.050008 
## iter  10 value 935.798013
## iter  20 value 852.656056
## iter  30 value 849.266973
## iter  40 value 849.164479
## iter  50 value 849.106145
## iter  60 value 849.099596
## iter  70 value 849.098351
## iter  80 value 849.097026
## iter  90 value 849.096417
## iter  90 value 849.096412
## iter  90 value 849.096412
## final  value 849.096412 
## converged
kable(tidy(ins_mlm5, exponentiate = T), 
      caption= "Model 5", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.022 0.560 -6.803000e+00 0.000
indvdl_othr age_group25-34 1.864 0.591 1.053000e+00 0.292
indvdl_othr age_group35-44 3.108 0.588 1.929000e+00 0.054
indvdl_othr age_group45-54 5.419 0.585 2.890000e+00 0.004
indvdl_othr age_group55-65 4.364 0.597 2.469000e+00 0.014
indvdl_othr raceB 0.423 0.947 -9.100000e-01 0.363
indvdl_othr raceH 0.464 0.671 -1.144000e+00 0.253
indvdl_othr regionEasternWA 2.087 0.438 1.678000e+00 0.093
indvdl_othr regionWesternWA 0.864 0.339 -4.320000e-01 0.666
indvdl_othr diag.status 0.579 0.552 -9.910000e-01 0.321
indvdl_othr income_catfpl138 8.891 0.555 3.940000e+00 0.000
indvdl_othr income_catfpl138-300 3.378 0.379 3.215000e+00 0.001
indvdl_othr income_catfpl300-400 1.713 0.510 1.055000e+00 0.291
indvdl_othr income_catfpl400-500 0.396 1.149 -8.060000e-01 0.420
indvdl_othr income_catfpl500-600 0.465 1.378 -5.550000e-01 0.579
indvdl_othr income_catMissing 2.257 0.708 1.151000e+00 0.250
medicare_caid (Intercept) 0.007 0.490 -1.026700e+01 0.000
medicare_caid age_group25-34 2.096 0.394 1.880000e+00 0.060
medicare_caid age_group35-44 1.975 0.439 1.549000e+00 0.121
medicare_caid age_group45-54 3.189 0.459 2.527000e+00 0.011
medicare_caid age_group55-65 6.008 0.420 4.268000e+00 0.000
medicare_caid raceB 1.442 0.516 7.100000e-01 0.478
medicare_caid raceH 1.335 0.378 7.640000e-01 0.445
medicare_caid regionEasternWA 2.066 0.386 1.880000e+00 0.060
medicare_caid regionWesternWA 1.599 0.269 1.742000e+00 0.081
medicare_caid diag.status 1.543 0.383 1.133000e+00 0.257
medicare_caid income_catfpl138 169.329 0.475 1.080500e+01 0.000
medicare_caid income_catfpl138-300 15.466 0.410 6.687000e+00 0.000
medicare_caid income_catfpl300-400 3.939 0.558 2.458000e+00 0.014
medicare_caid income_catfpl400-500 5.855 0.592 2.984000e+00 0.003
medicare_caid income_catfpl500-600 0.000 NaN NaN NaN
medicare_caid income_catMissing 22.859 0.516 6.070000e+00 0.000
othr_gov (Intercept) 0.016 0.529 -7.790000e+00 0.000
othr_gov age_group25-34 1.363 0.496 6.240000e-01 0.533
othr_gov age_group35-44 1.234 0.544 3.860000e-01 0.699
othr_gov age_group45-54 1.634 0.562 8.730000e-01 0.382
othr_gov age_group55-65 1.460 0.563 6.710000e-01 0.502
othr_gov raceB 6.706 0.421 4.516000e+00 0.000
othr_gov raceH 3.191 0.391 2.968000e+00 0.003
othr_gov regionEasternWA 1.425 0.596 5.950000e-01 0.552
othr_gov regionWesternWA 2.646 0.316 3.082000e+00 0.002
othr_gov diag.status 2.856 0.404 2.599000e+00 0.009
othr_gov income_catfpl138 6.477 0.589 3.170000e+00 0.002
othr_gov income_catfpl138-300 2.630 0.417 2.318000e+00 0.020
othr_gov income_catfpl300-400 1.154 0.576 2.490000e-01 0.803
othr_gov income_catfpl400-500 2.640 0.529 1.837000e+00 0.066
othr_gov income_catfpl500-600 1.243 0.917 2.370000e-01 0.813
othr_gov income_catMissing 3.511 0.603 2.081000e+00 0.037
uninsrd (Intercept) 0.010 0.614 -7.488000e+00 0.000
uninsrd age_group25-34 0.719 0.451 -7.330000e-01 0.464
uninsrd age_group35-44 1.030 0.530 5.500000e-02 0.956
uninsrd age_group45-54 2.315 0.542 1.550000e+00 0.121
uninsrd age_group55-65 0.536 0.708 -8.810000e-01 0.378
uninsrd raceB 10.074 0.461 5.006000e+00 0.000
uninsrd raceH 4.554 0.420 3.613000e+00 0.000
uninsrd regionEasternWA 1.556 0.510 8.670000e-01 0.386
uninsrd regionWesternWA 0.924 0.388 -2.040000e-01 0.839
uninsrd diag.status 0.000 477.953 -2.900000e-02 0.977
uninsrd income_catfpl138 34.953 0.663 5.361000e+00 0.000
uninsrd income_catfpl138-300 15.191 0.541 5.025000e+00 0.000
uninsrd income_catfpl300-400 2.038 0.828 8.600000e-01 0.390
uninsrd income_catfpl400-500 3.013 0.878 1.257000e+00 0.209
uninsrd income_catfpl500-600 0.000 NaN NaN NaN
uninsrd income_catMissing 11.353 0.750 3.238000e+00 0.001
Missing (Intercept) 0.060 0.465 -6.051000e+00 0.000
Missing age_group25-34 0.514 0.467 -1.423000e+00 0.155
Missing age_group35-44 0.415 0.566 -1.555000e+00 0.120
Missing age_group45-54 0.925 0.513 -1.530000e-01 0.879
Missing age_group55-65 0.531 0.566 -1.119000e+00 0.263
Missing raceB 0.510 1.079 -6.250000e-01 0.532
Missing raceH 1.065 0.547 1.150000e-01 0.909
Missing regionEasternWA 1.804 0.516 1.144000e+00 0.253
Missing regionWesternWA 0.698 0.386 -9.310000e-01 0.352
Missing diag.status 0.242 1.217 -1.164000e+00 0.244
Missing income_catfpl138 5.898 0.666 2.666000e+00 0.008
Missing income_catfpl138-300 1.802 0.555 1.062000e+00 0.288
Missing income_catfpl300-400 0.359 1.280 -7.990000e-01 0.424
Missing income_catfpl400-500 0.000 0.000 -2.950197e+53 0.000
Missing income_catfpl500-600 0.000 0.000 -2.915339e+148 0.000
Missing income_catMissing 42.398 0.442 8.478000e+00 0.000

Comparison of AICs

models <- get_formula("ins_mlm", 5)
aics <- get_aics("ins_mlm", 5, "mlm")

kable(data.frame(Model = models, 
                 AIC = aics), 
      caption= paste("Comparison of AICs; n =", dim(cDF)[[1]]),
      digits = 0) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Comparison of AICs; n = 923
Model AIC
insurance ~ age_group 2239
insurance ~ age_group + race 2197
insurance ~ age_group + race + region 2168
insurance ~ age_group + race + region + diag.status 2156
insurance ~ age_group + race + region + diag.status + income_cat 1858
  • Income is a powerful predictor, even after all other demographics are included.

Predicted distribution

  • The model is age + race + region + diag.status + income
#### Construct and save output
expand_df <- expand.grid(
  age_group = levels(cDF$age_group),
  race = c("B", "H", "O"), 
  region = c("EasternWA", "King", "WesternWA"), 
  diag.status = c(0, 1),
  income_cat = levels(cDF$income_cat)
  )

pred_ins <- cbind(expand_df, 
                  predict(ins_mlm4, 
                          newdata = expand_df, 
                          "probs")) 

# For some reason ZK included age_group in the model/prediction, but she
# averaged over it in the saved DF.  We don't do that here.  We also use
# pivot_longer in place of gather

pred_insl <- pred_ins %>% 
  mutate(diag.status = ifelse(diag.status == 0, "HIV-", "HIV+")) %>%
  pivot_longer(c(emplyr:Missing), 
               names_to = "insurance", 
               values_to = "prob")

# Save to output object
save_out$pred_ins <- pred_insl

By race, region and Dx

ggplot(data = pred_insl,
       aes(x = race, y = prob/6, # sums over omitted var age_group
           group = insurance, fill = insurance)) +
  geom_bar(stat="identity") +
  facet_wrap(diag.status ~ region) +
  labs(title = "Insurance distribution by race, Dx and region",
       caption = paste("Multinomial regression results based on n =",
                       dim(cDF)[[1]],
                       "; Dx HIV+ =",
                       scales::percent(proportions(table(cDF$diag.status))[[2]], 
                                       accuracy = 0.1),
                       "; assumes homogenous age distributions"),
       y = "predicted distribution") + 
  theme(strip.text = element_text(size = 10),
        axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12), 
        axis.text.x = element_text(size = 10, hjust = 0.5), 
        axis.text.y = element_text(size = 10), 
        legend.position = "bottom")

By age, region and Dx

ggplot(data = pred_insl,
       aes(x = age_group, y = prob/3, # sums over omitted var race 
           group = insurance, fill = insurance)) +
  geom_bar(stat="identity") +
  facet_wrap(diag.status ~ region) +
  labs(title = "Insurance distribution by Age, Dx and region",
       caption = paste("Multinomial regression results based on n =",
                       dim(cDF)[[1]],
                       "; Dx HIV+ =",
                       scales::percent(proportions(table(cDF$diag.status))[[2]], 
                                       accuracy = 0.1),
                       "; assumes homogenous race distributions"),
       y = "predicted distribution") + 
  theme(strip.text = element_text(size = 10),
        axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12), 
        axis.text.x = element_text(size = 10, hjust = 0.5), 
        axis.text.y = element_text(size = 10), 
        legend.position = "bottom")

Save out

These aren’t used, but I save them out just in case, in the Data/Descriptives folder.

descTable <- 
  tibble(Objects = names(save_out), 
         Description = c("Predicted income categories",
                         "Income category bounds (as %fpl)",
                         "Predicted insurance"),
         Method = c("Multinomial regression",
                    "Transformation",
                    "Multinomial regression"),
         Levels = c("age x race x region",
                    "6 categories",
                    "age x race x region x diag.status x income"),
         MakeFile = c("make_WhampIncIns",
                      "make_WhampIncIns",
                      "make_WhampIncIns"),
         DataSource = c("WHAMP survey",
                        "WHAMP survey",
                        "WHAMP survey"))

save_out <- c(save_out, list(descTable = descTable))

kable(descTable, 
      caption= "WHAMP Income and Insurance") %>% 
  kable_styling(position = "center", 
                bootstrap_options = c("striped"))
WHAMP Income and Insurance
Objects Description Method Levels MakeFile DataSource
pred_inc Predicted income categories Multinomial regression age x race x region make_WhampIncIns WHAMP survey
income_levels Income category bounds (as %fpl) Transformation 6 categories make_WhampIncIns WHAMP survey
pred_ins Predicted insurance Multinomial regression age x race x region x diag.status x income make_WhampIncIns WHAMP survey
### Params  
saveRDS(save_out, here::here("Data", "Descriptives", "WhampIncIns.RDS"))