rm(list = ls())
library(tidyr)
library(stringr)
library(skimr)
library(dplyr)
library(pander)
library(broom)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(plotly)
library(gridExtra)
library(nnet)
library(kableExtra)
# Define a couple of functions for model comparison tables
# for formulas
get_formula <- function(prefix, n) {
forms <- eval(parse(text =
paste0("list(",
paste0(prefix,
1:n,
"$terms", collapse = ","),
")")))
out <- sapply(forms, FUN = format)
return(out)
}
# for aics, need to also specify model type because
# object structures returned from AIC() are different
get_aics <- function(prefix, n, type) {
mods <- eval(parse(text =
paste0("list(",
paste0(prefix,
1:n,
collapse = ","),
")")))
aics <- sapply(mods, FUN = AIC)
if(type == "mlm"){
out = aics
} else {
out = aics[2,]
}
return(out)
}
Introduction
This report explores the income and insurance distributions from the WHAMP survey data.
Income
Income is not used in the EpiModel analyses because we do not have a way to model how income changes over the lifecourse. There are two types of income effects of interest that we therefore can not capture.
DAP income thresholds – From discussion with WADOH, these thresholds are not currently implemented as barriers to eligibility. For ADAP, people are primarily linked to appropriate insurance programs. For PDAP, supply of subsidies has been sufficient to meet demand. Note: this may change in the future
Effects on other input parameters – We use models to estimate parameters for the key dynamics in the simulation: sexual behavior (via stergms), and the PrEP and ART continua (via survival models, logistic regressions and other standard statistical methods). For the latter, we do include income in our exploratory modeling to determine whether we are missing an important predictor. In most cases we have found little impact after including age, race, region and SNAP.
We examine income in this report in order to understand the characteristics of the the WHAMP survey sample population. The WHAMP survey asked about household income (in categories) and number of dependents. These are combined with the FPL to construct the household income as a fraction of the FPL. If income is ever to be used as a DAP entry criteria, this is the standardized value that would be employed.
Insurance
Insurance is used in the economic analyses, and influences the cost estimates of the two DAPs. But it is used in a mechanistic way, by reverse engineering the relationship between the DAP costs and insurance status.
As with income, we do not have a way to model how insurance changes over the life course. So we do not use insurance to predict DAP enrollment, we instead assign insurance after DAP enrollment, based on the distribution observed for DAP clients. Since the State does assist DAP applicants with finding insurance, this is not as crazy as it sounds.
Note: The data used for the insurance analysis is the WADOH DAP program data, not the WHAMP survey data. For that analysis, please see this report.
Data
All of the data in this report come from the WHAMP Survey.
# Either make or read in data
dataset <- "readin"
#dataset <- "makeit"
if(dataset == "makeit") {
source(here::here("MakeData", "MM", "Scripts",
"makeWideData.R"))
} else {
wDF <- readRDS(here::here("MakeData", "MM", "Data",
"wideDF.RDS"))
}
# make sure this is updated if changed in makeWideData.R
# fpl is from: https://aspe.hhs.gov/2019-poverty-guidelines
# we use the guideline, not the threshold here
adap_fpl_cut <- data.frame(hhsize = c(1:8),
fpl = c(12490, 16910, 21330, 25750,
30170, 34590, 39010, 43430))
Missing data
Missing values in the WHAMP Survey demographics come mostly from insurance and income.
Income is missing for rscales::percent(proportions(table(is.na(wDF$hhincome2)))[[2]])` of respondents, with no pattern by age.
Income is missing for rscales::percent(proportions(table(is.na(wDF$insurance)))[[2]])` of respondents, with higher rates for the youngest age group.
Overall distributions
table(wDF$hhincome2, useNA = "al") %>%
kable(caption= "Household Income (Original Variable)") %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Household Income (Original Variable)
|
Var1
|
Freq
|
|
$0-19,999
|
94
|
|
$20,000-29,999
|
89
|
|
$30,000-39,999
|
72
|
|
$40,000-49,999
|
73
|
|
$50,000-59,999
|
53
|
|
$60,000-69,999
|
47
|
|
$70,000-79,999
|
58
|
|
$80,000+
|
360
|
|
NA
|
81
|
table(wDF$insurance, useNA = "al") %>%
kable(caption= "Insurance (Combined Variable)") %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Insurance (Combined Variable)
|
Var1
|
Freq
|
|
emplyr
|
577
|
|
indvdl_othr
|
61
|
|
medicare_caid
|
136
|
|
othr_gov
|
54
|
|
uninsrd
|
43
|
|
NA
|
56
|
By age
proportions(table(wDF$age_group, is.na(wDF$hh_income)), 1) %>%
kable(caption= "Missing income by age", digits = 2) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Missing income by age
|
|
FALSE
|
TRUE
|
|
15-24
|
0.96
|
0.04
|
|
25-34
|
0.97
|
0.03
|
|
35-44
|
0.97
|
0.03
|
|
45-54
|
0.96
|
0.04
|
|
55-65
|
0.97
|
0.03
|
# but missingness is related to age for insurance
proportions(table(wDF$age_group, is.na(wDF$insurance)), 1) %>%
kable(caption= "Missing insurance by age", digits = 2) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Missing insurance by age
|
|
FALSE
|
TRUE
|
|
15-24
|
0.85
|
0.15
|
|
25-34
|
0.95
|
0.05
|
|
35-44
|
0.97
|
0.03
|
|
45-54
|
0.93
|
0.07
|
|
55-65
|
0.96
|
0.04
|
With explicit missing levels
Here we use income_cat for fpl adjusted
wDF <- wDF %>%
mutate(insurance = ifelse(is.na(insurance),
"Missing", insurance),
insurance = forcats::fct_relevel(insurance, "Missing",
after = Inf),
income_cat = forcats::fct_explicit_na(income_cat,
"Missing"),
income_cat = forcats::fct_relevel(income_cat, "Missing",
after = Inf))
proportions(table(wDF$age_group, wDF$insurance), 1) %>%
kable(caption= "Final insurance by age", digits = 2) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Final insurance by age
|
|
emplyr
|
indvdl_othr
|
medicare_caid
|
othr_gov
|
uninsrd
|
Missing
|
|
15-24
|
0.53
|
0.04
|
0.15
|
0.05
|
0.07
|
0.15
|
|
25-34
|
0.66
|
0.05
|
0.12
|
0.07
|
0.06
|
0.05
|
|
35-44
|
0.68
|
0.07
|
0.13
|
0.04
|
0.04
|
0.03
|
|
45-54
|
0.61
|
0.10
|
0.12
|
0.07
|
0.04
|
0.07
|
|
55-65
|
0.58
|
0.08
|
0.22
|
0.06
|
0.02
|
0.04
|
proportions(table(wDF$age_group, wDF$income_cat), 1) %>%
kable(caption= "Final income categories by age", digits = 2) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Final income categories by age
|
|
fpl138
|
fpl138-300
|
fpl300-400
|
fpl400-500
|
fpl500-600
|
fpl600+
|
Missing
|
|
15-24
|
0.25
|
0.27
|
0.08
|
0.04
|
0.01
|
0.16
|
0.19
|
|
25-34
|
0.09
|
0.27
|
0.10
|
0.09
|
0.07
|
0.31
|
0.08
|
|
35-44
|
0.10
|
0.13
|
0.11
|
0.07
|
0.02
|
0.51
|
0.06
|
|
45-54
|
0.09
|
0.13
|
0.06
|
0.05
|
0.03
|
0.57
|
0.08
|
|
55-65
|
0.09
|
0.22
|
0.08
|
0.04
|
0.01
|
0.49
|
0.08
|
# Create the output object
save_out <- list()
HH income and dependents
Income
Table
These are the weighted frequencies for income.
wDF %>% group_by(hhincome2) %>%
summarize(n = n(),
n.wtd = sum(ego.wawt)) %>%
mutate(prop.wtd = n.wtd / sum(n.wtd)) %>%
kable(caption = "Household Income", digits = c(1,0,1,2)) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Household Income
|
hhincome2
|
n
|
n.wtd
|
prop.wtd
|
|
$0-19,999
|
94
|
86.6
|
0.09
|
|
$20,000-29,999
|
89
|
95.0
|
0.10
|
|
$30,000-39,999
|
72
|
65.6
|
0.07
|
|
$40,000-49,999
|
73
|
69.9
|
0.08
|
|
$50,000-59,999
|
53
|
52.2
|
0.06
|
|
$60,000-69,999
|
47
|
42.0
|
0.05
|
|
$70,000-79,999
|
58
|
59.6
|
0.06
|
|
$80,000+
|
360
|
375.5
|
0.41
|
|
NA
|
81
|
80.7
|
0.09
|
Plot
wDF %>% group_by(hhincome2) %>%
summarize(n.wtd = sum(ego.wawt)) %>%
mutate(prop.wtd = n.wtd / sum(n.wtd)) %>%
# ggplot(aes(x = sjlabelled::as_label(hh_income), y = prop.wtd)) +
ggplot(aes(x = hhincome2, y = prop.wtd)) +
geom_bar(stat="identity", fill = "blue", alpha = 0.5) +
labs(title = "Household income",
x = "HH income category",
y = "Wtd proportion") +
theme(axis.text.x = element_text(size = 8, angle = 50,
vjust = 0.6))

Dependents
Overall
tempDF <- wDF %>%
group_by(DEPEND) %>%
summarise(n = n(),
n.wtd = round(sum(ego.wawt), 1)) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>%
kable(caption= paste("HH Dependents; n = ",
sum(tempDF$n)),
digits = 2) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
HH Dependents; n = 927
|
DEPEND
|
n
|
n.wtd
|
prop.wtd
|
|
1
|
416
|
431.6
|
0.47
|
|
2
|
311
|
304.3
|
0.33
|
|
3
|
49
|
45.2
|
0.05
|
|
4
|
41
|
37.0
|
0.04
|
|
5
|
17
|
15.1
|
0.02
|
|
6
|
8
|
9.4
|
0.01
|
|
7
|
2
|
1.7
|
0.00
|
|
NA
|
83
|
82.7
|
0.09
|
By Income
Single person HHs show a distinct pattern, with more low income and fewer high income earners. Roughly a third of these HH are in the bottom two categories, and just over 25% are in the top category
With 2+ in the HH, the majority (over 60% of HH) are in the top income category, with the rest relatively evenly distributed in the lower categories.
For the 5+ HH, there are slightly fewer in the top category, and slightly more in the middle.
tempDF <- wDF %>%
filter(hhincome2 != 88 & !is.na(DEPEND)) %>%
mutate(depend = factor(if_else(DEPEND > 5, 5, DEPEND),
levels = c(1:5),
labels = c(as.character(1:4), "5+"))) %>%
group_by(depend, hhincome2) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(depend) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>%
ggplot(aes(x = hhincome2, y = prop.wtd,
group = depend, color = depend)) +
geom_line(lwd=1) +
labs(title = "Income distribution by dependents",
x = "HH income",
y = "wtd proportion",
color = "Dependents") +
scale_color_brewer(palette = "Spectral") +
#theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust=1))

FPL adjusted income
The adjustment procedure:
Match the FPL by the number of dependents for each respondent.
Divide the original income bounds by the FPL. This resulted in an income interval with a lower bound and an upper bound of FPL of each individual.
Roughly classify individuals’ income into 5 income categories <138% FPL, 138%-300% FPL, 300%-400% FPL, 500%-600% FPL, and >600% FPL. Then calculate the overlapping proportion of the interval in step 2 and the intervals defined in this step. If the overlapping proportion is over 50% in an income category, we assign the individual to this category.
If a respondent originally said that their income was above $80,000 per year, the respondent was assigned in the income category >600% FPL no matter how many dependents this respondent had.
Descriptives
Overall
- In general, individuals with lower income shifted their income categories after adjusting for the number of dependents.
tempDF <- wDF %>%
filter(!is.na(income_cat)) %>%
group_by(income_cat) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
ungroup() %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
p_income <- ggplot(tempDF,
aes(x = income_cat, y = prop.wtd,
text = prop.wtd)) +
geom_bar(fill = "blue", alpha = 0.5,
stat="identity",
position = position_dodge2(width = 0.9)) +
labs(title = "Income distribution in FPL categories",
sub = "Not adjusted for dependents",
caption = "Weighted WHAMP survey data",
x= "income FPL category",
y = "weighted proportion") +
ylim(0, 0.5)
title.plotly <-
list(text = paste0('Income distribution in FPL categories',
'<br>',
'<sup>',
'Not adjusted for dependents','</sup>'))
fig <- ggplotly(p_income , tooltip = "text",
width = 600, height = 400) %>%
layout(hoverlabel = list(font=list(color="white")),
title = title.plotly)
fig
Age
Table
tempDF <- wDF %>%
filter(!is.na(income_cat)) %>%
mutate(income_cat = factor(income_cat,
levels = sort(unique(income_cat)))) %>%
group_by(age_group, income_cat) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(age_group) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = age_group,
values_from = prop.wtd) %>%
kable(caption= paste("Distribution of income by age; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "Age group" = 5))
Distribution of income by age; n = 927
|
|
Age group
|
|
income_cat
|
15-24
|
25-34
|
35-44
|
45-54
|
55-65
|
|
fpl138
|
0.21
|
0.08
|
0.09
|
0.08
|
0.08
|
|
fpl138-300
|
0.30
|
0.26
|
0.11
|
0.11
|
0.22
|
|
fpl300-400
|
0.08
|
0.09
|
0.11
|
0.06
|
0.08
|
|
fpl400-500
|
0.04
|
0.10
|
0.07
|
0.04
|
0.03
|
|
fpl500-600
|
0.01
|
0.07
|
0.01
|
0.02
|
0.01
|
|
fpl600+
|
0.19
|
0.33
|
0.55
|
0.63
|
0.50
|
|
Missing
|
0.18
|
0.08
|
0.06
|
0.06
|
0.09
|
Plot
mycol = RColorBrewer::brewer.pal(length(unique(wDF$age_group))+1,
'Oranges')[-1]
ggplot(tempDF,
aes(x = income_cat, y = prop.wtd,
group = age_group, color = age_group)) +
geom_line(size = 1) +
geom_point() +
labs(title = "FPL adjusted income group by age",
x = "FPL adjusted income group",
color = "Age") +
scale_color_manual(values=mycol) +
theme(axis.text.x = element_text(angle = 45, hjust=1))

Race
Table
tempDF <- wDF %>%
group_by(race, income_cat) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(race) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = race,
values_from = prop.wtd) %>%
arrange(income_cat) %>%
kable(caption= paste("Distribution of income by race; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "Race" = 3))
Distribution of income by race; n = 927
|
|
Race
|
|
income_cat
|
B
|
H
|
O
|
|
fpl138
|
0.08
|
0.14
|
0.10
|
|
fpl138-300
|
0.30
|
0.25
|
0.18
|
|
fpl300-400
|
0.10
|
0.08
|
0.08
|
|
fpl400-500
|
0.06
|
0.10
|
0.05
|
|
fpl500-600
|
NA
|
0.06
|
0.02
|
|
fpl600+
|
0.39
|
0.25
|
0.47
|
|
Missing
|
0.07
|
0.11
|
0.09
|
Plot
ggplot(tempDF,
aes(x = income_cat, y = prop.wtd,
group = race, color = race)) +
geom_line(size = 1, alpha = 0.7) +
geom_point() +
labs(title = "FPL adjusted income group by age",
x = "FPL adjusted income group",
color = "Age") +
# scale_color_manual(values=mycol) +
theme(axis.text.x = element_text(angle = 45, hjust=1))

Region
Table
tempDF <- wDF %>%
group_by(region, income_cat) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(region) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = region,
values_from = prop.wtd) %>%
arrange(income_cat) %>%
kable(caption= paste("Distribution of income by region; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "region" = 3))
Distribution of income by region; n = 927
|
|
region
|
|
income_cat
|
EasternWA
|
King
|
WesternWA
|
|
fpl138
|
0.20
|
0.06
|
0.14
|
|
fpl138-300
|
0.27
|
0.17
|
0.23
|
|
fpl300-400
|
0.09
|
0.07
|
0.11
|
|
fpl400-500
|
0.05
|
0.05
|
0.07
|
|
fpl500-600
|
0.03
|
0.03
|
0.02
|
|
fpl600+
|
0.24
|
0.54
|
0.32
|
|
Missing
|
0.11
|
0.07
|
0.11
|
Plot
ggplot(tempDF,
aes(x = income_cat, y = prop.wtd,
group = region, color = region)) +
geom_line(size = 1, alpha = 0.7) +
geom_point() +
labs(title = "FPL adjusted income group by age",
x = "FPL adjusted income group",
color = "Age") +
# scale_color_manual(values=mycol) +
theme(axis.text.x = element_text(angle = 45, hjust=1))

Prediction: Adjusted Income
Multinomial regressions
- The coefficients are relative risk ratios.
Model 1
# multinom only uses complete cases, and inconsistently handles missing
# cases which breaks other functions like summary()
cDFi <- wDF %>% filter(complete == 1 & income_cat != "Missing")
inc_mlm1 <- multinom(income_cat ~ diag.status,
data = cDFi,
weights = ego.wawt)
## # weights: 18 (10 variable)
## initial value 1510.111738
## iter 10 value 1207.124161
## final value 1205.269549
## converged
#summary(inc_mlm1)
kable(tidy(inc_mlm1, exponentiate = T), digits = 2,
caption = "Model 1") %>%
kable_styling(full_width = F,
position="center",
bootstrap_options = c("striped"))
Model 1
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
fpl138-300
|
(Intercept)
|
1.92
|
0.13
|
4.91
|
0.00
|
|
fpl138-300
|
diag.status
|
1.01
|
0.42
|
0.03
|
0.97
|
|
fpl300-400
|
(Intercept)
|
0.81
|
0.16
|
-1.34
|
0.18
|
|
fpl300-400
|
diag.status
|
1.10
|
0.50
|
0.20
|
0.84
|
|
fpl400-500
|
(Intercept)
|
0.55
|
0.18
|
-3.30
|
0.00
|
|
fpl400-500
|
diag.status
|
1.19
|
0.54
|
0.31
|
0.75
|
|
fpl500-600
|
(Intercept)
|
0.27
|
0.23
|
-5.57
|
0.00
|
|
fpl500-600
|
diag.status
|
0.40
|
1.05
|
-0.88
|
0.38
|
|
fpl600+
|
(Intercept)
|
4.15
|
0.12
|
11.87
|
0.00
|
|
fpl600+
|
diag.status
|
1.20
|
0.37
|
0.48
|
0.63
|
Model 2
inc_mlm2 <- multinom(income_cat ~ age_group,
data = cDFi,
weights = ego.wawt)
## # weights: 36 (25 variable)
## initial value 1510.111738
## iter 10 value 1170.486371
## iter 20 value 1142.577044
## iter 30 value 1139.946598
## final value 1139.943809
## converged
kable(tidy(inc_mlm2, exponentiate = T), digits = 2,
caption= "Model 2") %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 2
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
fpl138-300
|
(Intercept)
|
1.42
|
0.23
|
1.55
|
0.12
|
|
fpl138-300
|
age_group25-34
|
2.12
|
0.35
|
2.17
|
0.03
|
|
fpl138-300
|
age_group35-44
|
0.91
|
0.40
|
-0.23
|
0.82
|
|
fpl138-300
|
age_group45-54
|
1.00
|
0.43
|
-0.01
|
0.99
|
|
fpl138-300
|
age_group55-65
|
1.98
|
0.39
|
1.77
|
0.08
|
|
fpl300-400
|
(Intercept)
|
0.36
|
0.33
|
-3.01
|
0.00
|
|
fpl300-400
|
age_group25-34
|
3.08
|
0.46
|
2.46
|
0.01
|
|
fpl300-400
|
age_group35-44
|
3.56
|
0.47
|
2.71
|
0.01
|
|
fpl300-400
|
age_group45-54
|
2.01
|
0.55
|
1.27
|
0.20
|
|
fpl300-400
|
age_group55-65
|
2.61
|
0.51
|
1.88
|
0.06
|
|
fpl400-500
|
(Intercept)
|
0.19
|
0.43
|
-3.84
|
0.00
|
|
fpl400-500
|
age_group25-34
|
6.16
|
0.53
|
3.41
|
0.00
|
|
fpl400-500
|
age_group35-44
|
4.18
|
0.57
|
2.50
|
0.01
|
|
fpl400-500
|
age_group45-54
|
2.78
|
0.65
|
1.58
|
0.11
|
|
fpl400-500
|
age_group55-65
|
1.93
|
0.68
|
0.97
|
0.33
|
|
fpl500-600
|
(Intercept)
|
0.04
|
0.90
|
-3.62
|
0.00
|
|
fpl500-600
|
age_group25-34
|
20.32
|
0.96
|
3.13
|
0.00
|
|
fpl500-600
|
age_group35-44
|
4.58
|
1.10
|
1.38
|
0.17
|
|
fpl500-600
|
age_group45-54
|
8.41
|
1.07
|
2.00
|
0.05
|
|
fpl500-600
|
age_group55-65
|
2.34
|
1.30
|
0.66
|
0.51
|
|
fpl600+
|
(Intercept)
|
0.88
|
0.25
|
-0.52
|
0.60
|
|
fpl600+
|
age_group25-34
|
4.40
|
0.36
|
4.13
|
0.00
|
|
fpl600+
|
age_group35-44
|
7.35
|
0.37
|
5.44
|
0.00
|
|
fpl600+
|
age_group45-54
|
9.60
|
0.39
|
5.79
|
0.00
|
|
fpl600+
|
age_group55-65
|
7.27
|
0.38
|
5.16
|
0.00
|
Model 3
inc_mlm3 <- multinom(income_cat ~ age_group + factor(race),
data = cDFi,
weights = ego.wawt)
## # weights: 48 (35 variable)
## initial value 1510.111738
## iter 10 value 1168.693774
## iter 20 value 1128.622592
## iter 30 value 1126.846067
## iter 40 value 1126.569792
## iter 50 value 1126.565275
## final value 1126.565256
## converged
kable(tidy(inc_mlm3, exponentiate = T), digits = 2,
caption= "Model 3") %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 3
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
fpl138-300
|
(Intercept)
|
2.89
|
0.55
|
1.93
|
0.05
|
|
fpl138-300
|
age_group25-34
|
2.08
|
0.35
|
2.11
|
0.04
|
|
fpl138-300
|
age_group35-44
|
0.90
|
0.40
|
-0.26
|
0.79
|
|
fpl138-300
|
age_group45-54
|
0.99
|
0.43
|
-0.03
|
0.98
|
|
fpl138-300
|
age_group55-65
|
2.03
|
0.39
|
1.83
|
0.07
|
|
fpl138-300
|
factor(race)H
|
0.50
|
0.61
|
-1.13
|
0.26
|
|
fpl138-300
|
factor(race)O
|
0.46
|
0.53
|
-1.46
|
0.15
|
|
fpl300-400
|
(Intercept)
|
0.58
|
0.68
|
-0.79
|
0.43
|
|
fpl300-400
|
age_group25-34
|
3.02
|
0.46
|
2.42
|
0.02
|
|
fpl300-400
|
age_group35-44
|
3.53
|
0.47
|
2.69
|
0.01
|
|
fpl300-400
|
age_group45-54
|
1.97
|
0.55
|
1.24
|
0.22
|
|
fpl300-400
|
age_group55-65
|
2.58
|
0.51
|
1.85
|
0.06
|
|
fpl300-400
|
factor(race)H
|
0.49
|
0.75
|
-0.96
|
0.34
|
|
fpl300-400
|
factor(race)O
|
0.63
|
0.63
|
-0.72
|
0.47
|
|
fpl400-500
|
(Intercept)
|
0.22
|
0.82
|
-1.85
|
0.06
|
|
fpl400-500
|
age_group25-34
|
6.24
|
0.53
|
3.43
|
0.00
|
|
fpl400-500
|
age_group35-44
|
4.16
|
0.57
|
2.50
|
0.01
|
|
fpl400-500
|
age_group45-54
|
2.84
|
0.65
|
1.61
|
0.11
|
|
fpl400-500
|
age_group55-65
|
2.03
|
0.68
|
1.04
|
0.30
|
|
fpl400-500
|
factor(race)H
|
1.17
|
0.82
|
0.19
|
0.85
|
|
fpl400-500
|
factor(race)O
|
0.78
|
0.73
|
-0.34
|
0.74
|
|
fpl500-600
|
(Intercept)
|
0.00
|
0.61
|
-25.46
|
0.00
|
|
fpl500-600
|
age_group25-34
|
21.47
|
0.96
|
3.18
|
0.00
|
|
fpl500-600
|
age_group35-44
|
4.64
|
1.10
|
1.39
|
0.16
|
|
fpl500-600
|
age_group45-54
|
8.93
|
1.07
|
2.05
|
0.04
|
|
fpl500-600
|
age_group55-65
|
2.52
|
1.30
|
0.71
|
0.48
|
|
fpl500-600
|
factor(race)H
|
347946.96
|
0.44
|
29.24
|
0.00
|
|
fpl500-600
|
factor(race)O
|
168177.03
|
0.38
|
31.32
|
0.00
|
|
fpl600+
|
(Intercept)
|
1.04
|
0.56
|
0.08
|
0.94
|
|
fpl600+
|
age_group25-34
|
4.29
|
0.36
|
4.04
|
0.00
|
|
fpl600+
|
age_group35-44
|
7.34
|
0.37
|
5.42
|
0.00
|
|
fpl600+
|
age_group45-54
|
9.30
|
0.39
|
5.69
|
0.00
|
|
fpl600+
|
age_group55-65
|
6.86
|
0.39
|
4.99
|
0.00
|
|
fpl600+
|
factor(race)H
|
0.40
|
0.61
|
-1.50
|
0.13
|
|
fpl600+
|
factor(race)O
|
0.93
|
0.53
|
-0.13
|
0.89
|
Model 4
inc_mlm4 <- multinom(income_cat ~ age_group +
factor(race) + factor(region),
data = cDFi,
weights = ego.wawt)
## # weights: 60 (45 variable)
## initial value 1510.111738
## iter 10 value 1141.915601
## iter 20 value 1097.041447
## iter 30 value 1094.935028
## iter 40 value 1094.415678
## iter 50 value 1094.348000
## final value 1094.347252
## converged
kable(tidy(inc_mlm4, exponentiate = T), digits = 2,
caption= "Model 4") %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 4
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
fpl138-300
|
(Intercept)
|
1.75
|
0.64
|
0.87
|
0.38
|
|
fpl138-300
|
age_group25-34
|
2.07
|
0.35
|
2.09
|
0.04
|
|
fpl138-300
|
age_group35-44
|
0.92
|
0.40
|
-0.20
|
0.84
|
|
fpl138-300
|
age_group45-54
|
1.02
|
0.44
|
0.04
|
0.97
|
|
fpl138-300
|
age_group55-65
|
2.14
|
0.39
|
1.95
|
0.05
|
|
fpl138-300
|
factor(race)H
|
0.60
|
0.62
|
-0.83
|
0.41
|
|
fpl138-300
|
factor(race)O
|
0.52
|
0.54
|
-1.22
|
0.22
|
|
fpl138-300
|
factor(region)King
|
1.92
|
0.37
|
1.74
|
0.08
|
|
fpl138-300
|
factor(region)WesternWA
|
1.25
|
0.37
|
0.60
|
0.55
|
|
fpl300-400
|
(Intercept)
|
0.28
|
0.81
|
-1.56
|
0.12
|
|
fpl300-400
|
age_group25-34
|
3.02
|
0.46
|
2.40
|
0.02
|
|
fpl300-400
|
age_group35-44
|
3.63
|
0.47
|
2.73
|
0.01
|
|
fpl300-400
|
age_group45-54
|
2.01
|
0.55
|
1.26
|
0.21
|
|
fpl300-400
|
age_group55-65
|
2.72
|
0.51
|
1.95
|
0.05
|
|
fpl300-400
|
factor(race)H
|
0.61
|
0.75
|
-0.65
|
0.51
|
|
fpl300-400
|
factor(race)O
|
0.73
|
0.63
|
-0.50
|
0.62
|
|
fpl300-400
|
factor(region)King
|
2.38
|
0.49
|
1.78
|
0.08
|
|
fpl300-400
|
factor(region)WesternWA
|
1.61
|
0.48
|
0.99
|
0.32
|
|
fpl400-500
|
(Intercept)
|
0.08
|
0.98
|
-2.60
|
0.01
|
|
fpl400-500
|
age_group25-34
|
6.23
|
0.54
|
3.41
|
0.00
|
|
fpl400-500
|
age_group35-44
|
4.34
|
0.57
|
2.55
|
0.01
|
|
fpl400-500
|
age_group45-54
|
2.94
|
0.65
|
1.65
|
0.10
|
|
fpl400-500
|
age_group55-65
|
2.19
|
0.68
|
1.15
|
0.25
|
|
fpl400-500
|
factor(race)H
|
1.54
|
0.82
|
0.52
|
0.60
|
|
fpl400-500
|
factor(race)O
|
0.93
|
0.74
|
-0.10
|
0.92
|
|
fpl400-500
|
factor(region)King
|
3.40
|
0.58
|
2.10
|
0.04
|
|
fpl400-500
|
factor(region)WesternWA
|
1.95
|
0.59
|
1.13
|
0.26
|
|
fpl500-600
|
(Intercept)
|
0.00
|
0.73
|
-21.69
|
0.00
|
|
fpl500-600
|
age_group25-34
|
21.14
|
0.97
|
3.16
|
0.00
|
|
fpl500-600
|
age_group35-44
|
4.83
|
1.11
|
1.42
|
0.15
|
|
fpl500-600
|
age_group45-54
|
9.60
|
1.07
|
2.11
|
0.03
|
|
fpl500-600
|
age_group55-65
|
2.75
|
1.30
|
0.78
|
0.44
|
|
fpl500-600
|
factor(race)H
|
285573.47
|
0.47
|
26.66
|
0.00
|
|
fpl500-600
|
factor(race)O
|
127991.23
|
0.45
|
26.20
|
0.00
|
|
fpl500-600
|
factor(region)King
|
3.06
|
0.74
|
1.51
|
0.13
|
|
fpl500-600
|
factor(region)WesternWA
|
1.22
|
0.78
|
0.25
|
0.80
|
|
fpl600+
|
(Intercept)
|
0.21
|
0.67
|
-2.33
|
0.02
|
|
fpl600+
|
age_group25-34
|
4.30
|
0.37
|
3.92
|
0.00
|
|
fpl600+
|
age_group35-44
|
8.09
|
0.38
|
5.48
|
0.00
|
|
fpl600+
|
age_group45-54
|
10.74
|
0.41
|
5.83
|
0.00
|
|
fpl600+
|
age_group55-65
|
8.08
|
0.40
|
5.22
|
0.00
|
|
fpl600+
|
factor(race)H
|
0.58
|
0.63
|
-0.86
|
0.39
|
|
fpl600+
|
factor(race)O
|
1.24
|
0.54
|
0.40
|
0.69
|
|
fpl600+
|
factor(region)King
|
7.27
|
0.38
|
5.17
|
0.00
|
|
fpl600+
|
factor(region)WesternWA
|
1.70
|
0.38
|
1.38
|
0.17
|
Comparison of AICs
models <- get_formula("inc_mlm", 4)
aics <- get_aics("inc_mlm", 4, "mlm")
kable(data.frame(model = models,
AIC = aics),
digits = 0,
caption= paste("Comparison of AICs; n =", dim(cDFi)[[1]])) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Comparison of AICs; n = 842
|
model
|
AIC
|
|
income_cat ~ diag.status
|
2431
|
|
income_cat ~ age_group
|
2330
|
|
income_cat ~ age_group + factor(race)
|
2323
|
|
income_cat ~ age_group + factor(race) + factor(region)
|
2279
|
- We will look at the predictions for model 4, including age + race + region.
Predicted distribution
# Construct and save output
expand_df <- expand.grid(
age_group = factor(c(1:5),
levels = c(1:5),
labels = levels(wDF$age_group)),
race = factor(c("B", "H", "O")),
region = c("EasternWA", "King", "WesternWA"))
pred_inc <- cbind(expand_df,
predict(inc_mlm4,
newdata = expand_df,
"probs"))
pred_inc <- pred_inc %>%
gather(income, prob, `fpl138`:`fpl600+`)
# lb & ub values are income as percent of fpl given HHdependents
income_levels <- data.frame(lvl = c(1:6),
lb = c(0, 138, 300, 400, 500, 600),
ub = c(137, 299, 399, 499, 599, 1000))
# Save output
save_out$pred_inc <- pred_inc
save_out$income_levels <- income_levels
ggplot(data = pred_inc, aes(x = income, y = prob, group = race)) +
geom_line(aes(color = race)) +
facet_wrap(~ region + age_group, ncol = 5) +
ylim(0, 0.8) +
labs(title = "FPL income group by race, region and age",
y = "predicted distribution") +
theme(axis.text.x = element_text(size = 8, angle = 30, hjust = 0.5),
strip.text.x = element_text(size = 8),
legend.position = "bottom")

FPL guidelines
- These are the federal poverty level (FPL) guidelines by the number of dependents in the household. Source here.
kable(adap_fpl_cut,
caption= "FPL by dependents") %>%
kable_styling(full_width=F, position="center")
FPL by dependents
|
hhsize
|
fpl
|
|
1
|
12490
|
|
2
|
16910
|
|
3
|
21330
|
|
4
|
25750
|
|
5
|
30170
|
|
6
|
34590
|
|
7
|
39010
|
|
8
|
43430
|
Insurance
Descriptive statistics
Overall
#with(cDF, table(insurance, useNA = "al"))
cDF <- wDF %>% filter(complete==1)
cDF %>%
group_by(insurance) %>%
summarise(n = n(),
n.wtd = round(sum(ego.wawt), 1)) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
kable(caption= "Distribution of insurance") %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Distribution of insurance
|
insurance
|
n
|
n.wtd
|
prop.wtd
|
|
emplyr
|
576
|
586.9
|
0.64
|
|
indvdl_othr
|
61
|
54.9
|
0.06
|
|
medicare_caid
|
134
|
120.9
|
0.13
|
|
othr_gov
|
54
|
56.2
|
0.06
|
|
uninsrd
|
42
|
47.3
|
0.05
|
|
Missing
|
56
|
57.5
|
0.06
|
HIV status
Table
tempDF <- cDF %>%
mutate(dx.status = ifelse(diag.status == 1, "HIV+", "HIV-")) %>%
group_by(dx.status, insurance) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(dx.status) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = dx.status,
values_from = prop.wtd) %>%
kable(caption= paste("Distribution of insurance by Dx status; n =",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "Dx status" = 2))
Distribution of insurance by Dx status; n = 923
|
|
Dx status
|
|
insurance
|
HIV-
|
HIV+
|
|
emplyr
|
0.64
|
0.60
|
|
indvdl_othr
|
0.06
|
0.04
|
|
medicare_caid
|
0.12
|
0.21
|
|
othr_gov
|
0.05
|
0.14
|
|
uninsrd
|
0.06
|
NA
|
|
Missing
|
0.07
|
0.01
|
Plot
fig <- ggplot(tempDF,
aes(x = insurance, y = prop.wtd, text = prop.wtd,
group = dx.status, fill = dx.status)) +
geom_bar(stat="identity", position = "dodge",
color = "darkgrey",
alpha = 0.7, ) +
theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))
title.plotly <-
list(text = paste0('Insurance by HIV Dx',
'<br>',
'<sup>',
'n = ',
sum(tempDF$n),
'</sup>'))
ggplotly(fig , tooltip = "text") %>%
layout(#hoverlabel = list(font=list(color="black")),
title = title.plotly)
Age
Table
tempDF <- cDF %>%
group_by(age_group, insurance) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(age_group) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = age_group,
values_from = prop.wtd) %>%
kable(caption= paste("Distribution of insurance by age group; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "Age Group" = 5))
Distribution of insurance by age group; n = 923
|
|
Age Group
|
|
insurance
|
15-24
|
25-34
|
35-44
|
45-54
|
55-65
|
|
emplyr
|
0.54
|
0.67
|
0.71
|
0.64
|
0.59
|
|
indvdl_othr
|
0.03
|
0.04
|
0.06
|
0.09
|
0.07
|
|
medicare_caid
|
0.13
|
0.12
|
0.10
|
0.10
|
0.22
|
|
othr_gov
|
0.05
|
0.07
|
0.06
|
0.06
|
0.06
|
|
uninsrd
|
0.11
|
0.05
|
0.04
|
0.05
|
0.02
|
|
Missing
|
0.15
|
0.05
|
0.03
|
0.06
|
0.03
|
Plot
fig <- ggplot(tempDF,
aes(x = insurance, y = prop.wtd, text = prop.wtd,
group = age_group, fill = age_group)) +
geom_bar(stat="identity", position = "dodge",
color = "darkgrey",
alpha = 0.7, ) +
theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))
title.plotly <-
list(text = paste0('Insurance distribution by age group',
'<br>',
'<sup>',
'n = ',
sum(tempDF$n),
'</sup>'))
ggplotly(fig , tooltip = "text") %>%
layout(#hoverlabel = list(font=list(color="black")),
title = title.plotly)
Race
Table
Almost all of the uninsured are Black or Hispanic.
tempDF <- cDF %>%
group_by(race, insurance) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(race) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = race,
values_from = prop.wtd) %>%
kable(caption= paste("Distribution of insurance by race; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "Race" = 3))
Distribution of insurance by race; n = 923
|
|
Race
|
|
insurance
|
B
|
H
|
O
|
|
emplyr
|
0.47
|
0.52
|
0.67
|
|
indvdl_othr
|
0.02
|
0.02
|
0.07
|
|
medicare_caid
|
0.13
|
0.15
|
0.13
|
|
othr_gov
|
0.18
|
0.12
|
0.04
|
|
uninsrd
|
0.19
|
0.13
|
0.03
|
|
Missing
|
0.02
|
0.07
|
0.07
|
Plot
fig <- ggplot(tempDF,
aes(x = insurance, y = prop.wtd, text = prop.wtd,
group = race, fill = race)) +
geom_bar(stat="identity", position = "dodge", color = "darkgrey",
alpha = 0.7, ) +
theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
plot.margin = margin(t=20,r=5,b=5,l=0))
title.plotly <-
list(text = paste0('Insurance distribution by race',
'<br>',
'<sup>',
'n = ',
sum(tempDF$n),
'</sup>'))
ggplotly(fig , tooltip = "text") %>%
layout(#hoverlabel = list(font=list(color="black")),
title = title.plotly)
Region
Table
tempDF <- cDF %>%
group_by(region, insurance) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(region) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = region,
values_from = prop.wtd) %>%
kable(caption= paste("Distribution of insurance by region; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "Region" = 3))
Distribution of insurance by region; n = 923
|
|
Region
|
|
insurance
|
EasternWA
|
King
|
WesternWA
|
|
emplyr
|
0.46
|
0.71
|
0.56
|
|
indvdl_othr
|
0.10
|
0.06
|
0.05
|
|
medicare_caid
|
0.22
|
0.08
|
0.19
|
|
othr_gov
|
0.04
|
0.04
|
0.10
|
|
uninsrd
|
0.08
|
0.05
|
0.05
|
|
Missing
|
0.10
|
0.06
|
0.05
|
Plot
fig <- ggplot(tempDF,
aes(x = insurance, y = prop.wtd, text = prop.wtd,
group = region, fill = region)) +
geom_bar(stat="identity", position = "dodge", color = "darkgrey",
alpha = 0.7, ) +
ylab("wtd proportion") +
theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
plot.margin = margin(t=20,r=5,b=5,l=0))
title.plotly <-
list(text = paste0('Insurance distribution by region',
'<br>',
'<sup>',
'n = ',
sum(tempDF$n),
'</sup>'))
ggplotly(fig , tooltip = "text") %>%
layout(#hoverlabel = list(font=list(color="black")),
title = title.plotly)
Income
Table
# We show the missing income cases in the table
tempDF <- cDF %>%
group_by(income_cat, insurance) %>%
summarise(n = n(),
n.wtd = sum(ego.wawt)) %>%
group_by(income_cat) %>%
mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))
tempDF %>% select(-c(n, n.wtd)) %>%
pivot_wider(names_from = income_cat,
values_from = prop.wtd) %>%
kable(caption= paste("Distribution of insurance by FPL income category; n = ",
sum(tempDF$n))) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped")) %>%
add_header_above(c("", "FPL income category" = 7))
Distribution of insurance by FPL income category; n = 923
|
|
FPL income category
|
|
insurance
|
fpl138
|
fpl138-300
|
fpl300-400
|
fpl400-500
|
fpl500-600
|
fpl600+
|
Missing
|
|
emplyr
|
0.18
|
0.49
|
0.76
|
0.72
|
0.92
|
0.83
|
0.27
|
|
indvdl_othr
|
0.07
|
0.08
|
0.07
|
0.02
|
0.02
|
0.06
|
0.03
|
|
medicare_caid
|
0.56
|
0.19
|
0.08
|
0.10
|
NA
|
0.02
|
0.15
|
|
othr_gov
|
0.06
|
0.08
|
0.06
|
0.13
|
0.06
|
0.05
|
0.06
|
|
uninsrd
|
0.10
|
0.13
|
0.03
|
0.04
|
NA
|
0.02
|
0.05
|
|
Missing
|
0.05
|
0.03
|
0.01
|
NA
|
NA
|
0.03
|
0.44
|
Plot
fig <- ggplot(tempDF,
aes(x = insurance, y = prop.wtd, text = prop.wtd,
group = income_cat, fill = income_cat)) +
geom_bar(stat="identity", position = "dodge", color = "darkgrey",
alpha = 0.7, ) +
theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
plot.margin = margin(t=20,r=5,b=5,l=0))
title.plotly <-
list(text = paste0('Insurance distribution by FPL income category',
'<br>',
'<sup>',
'n = ',
sum(tempDF$n),
'</sup>'))
ggplotly(fig , tooltip = "text") %>%
layout(#hoverlabel = list(font=list(color="black")),
title = title.plotly)
Prediction
Multinomial regressions
insDF <- cDF %>%
mutate(age_group = relevel(factor(age_group), ref = "15-24"),
race = relevel(factor(race), ref = "O"),
region = relevel(factor(region), ref = "King"),
income_cat = relevel(factor(income_cat), ref = "fpl600+"))
Model 1
ins_mlm1 <- multinom(insurance ~ age_group,
data = insDF,
weights = ego.wawt)
## # weights: 36 (25 variable)
## initial value 1655.050008
## iter 10 value 1132.758539
## iter 20 value 1094.634600
## iter 30 value 1094.503716
## final value 1094.503684
## converged
kable(tidy(ins_mlm1, exponentiate = T),
caption= "Model 1", digits = 3) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 1
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
indvdl_othr
|
(Intercept)
|
0.057
|
0.467
|
-6.117
|
0.000
|
|
indvdl_othr
|
age_group25-34
|
1.122
|
0.570
|
0.203
|
0.839
|
|
indvdl_othr
|
age_group35-44
|
1.539
|
0.555
|
0.776
|
0.438
|
|
indvdl_othr
|
age_group45-54
|
2.419
|
0.543
|
1.627
|
0.104
|
|
indvdl_othr
|
age_group55-65
|
2.189
|
0.552
|
1.420
|
0.156
|
|
medicare_caid
|
(Intercept)
|
0.236
|
0.249
|
-5.797
|
0.000
|
|
medicare_caid
|
age_group25-34
|
0.752
|
0.324
|
-0.879
|
0.379
|
|
medicare_caid
|
age_group35-44
|
0.568
|
0.352
|
-1.605
|
0.109
|
|
medicare_caid
|
age_group45-54
|
0.666
|
0.362
|
-1.124
|
0.261
|
|
medicare_caid
|
age_group55-65
|
1.566
|
0.313
|
1.435
|
0.151
|
|
othr_gov
|
(Intercept)
|
0.091
|
0.377
|
-6.359
|
0.000
|
|
othr_gov
|
age_group25-34
|
1.099
|
0.462
|
0.205
|
0.838
|
|
othr_gov
|
age_group35-44
|
0.966
|
0.483
|
-0.072
|
0.943
|
|
othr_gov
|
age_group45-54
|
1.037
|
0.500
|
0.072
|
0.942
|
|
othr_gov
|
age_group55-65
|
1.165
|
0.493
|
0.309
|
0.757
|
|
uninsrd
|
(Intercept)
|
0.198
|
0.268
|
-6.050
|
0.000
|
|
uninsrd
|
age_group25-34
|
0.393
|
0.401
|
-2.326
|
0.020
|
|
uninsrd
|
age_group35-44
|
0.268
|
0.466
|
-2.820
|
0.005
|
|
uninsrd
|
age_group45-54
|
0.393
|
0.448
|
-2.084
|
0.037
|
|
uninsrd
|
age_group55-65
|
0.144
|
0.647
|
-2.990
|
0.003
|
|
Missing
|
(Intercept)
|
0.286
|
0.231
|
-5.424
|
0.000
|
|
Missing
|
age_group25-34
|
0.251
|
0.387
|
-3.576
|
0.000
|
|
Missing
|
age_group35-44
|
0.148
|
0.484
|
-3.949
|
0.000
|
|
Missing
|
age_group45-54
|
0.340
|
0.398
|
-2.710
|
0.007
|
|
Missing
|
age_group55-65
|
0.205
|
0.477
|
-3.325
|
0.001
|
Model 2
ins_mlm2 <- multinom(insurance ~ age_group + race,
data = insDF,
weights = ego.wawt)
## # weights: 48 (35 variable)
## initial value 1655.050008
## iter 10 value 1106.835938
## iter 20 value 1063.399354
## iter 30 value 1063.252499
## final value 1063.252469
## converged
kable(tidy(ins_mlm2, exponentiate = T),
caption= "Model 2", digits = 3) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 2
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
indvdl_othr
|
(Intercept)
|
0.063
|
0.470
|
-5.893
|
0.000
|
|
indvdl_othr
|
age_group25-34
|
1.124
|
0.570
|
0.205
|
0.837
|
|
indvdl_othr
|
age_group35-44
|
1.541
|
0.556
|
0.778
|
0.437
|
|
indvdl_othr
|
age_group45-54
|
2.358
|
0.544
|
1.578
|
0.115
|
|
indvdl_othr
|
age_group55-65
|
2.108
|
0.553
|
1.349
|
0.177
|
|
indvdl_othr
|
raceB
|
0.421
|
0.941
|
-0.918
|
0.359
|
|
indvdl_othr
|
raceH
|
0.525
|
0.657
|
-0.981
|
0.327
|
|
medicare_caid
|
(Intercept)
|
0.216
|
0.257
|
-5.972
|
0.000
|
|
medicare_caid
|
age_group25-34
|
0.754
|
0.325
|
-0.868
|
0.385
|
|
medicare_caid
|
age_group35-44
|
0.569
|
0.353
|
-1.595
|
0.111
|
|
medicare_caid
|
age_group45-54
|
0.686
|
0.363
|
-1.038
|
0.299
|
|
medicare_caid
|
age_group55-65
|
1.633
|
0.315
|
1.556
|
0.120
|
|
medicare_caid
|
raceB
|
1.481
|
0.420
|
0.934
|
0.350
|
|
medicare_caid
|
raceH
|
1.589
|
0.316
|
1.465
|
0.143
|
|
othr_gov
|
(Intercept)
|
0.059
|
0.403
|
-7.010
|
0.000
|
|
othr_gov
|
age_group25-34
|
1.058
|
0.470
|
0.119
|
0.905
|
|
othr_gov
|
age_group35-44
|
0.940
|
0.491
|
-0.127
|
0.899
|
|
othr_gov
|
age_group45-54
|
1.118
|
0.511
|
0.218
|
0.827
|
|
othr_gov
|
age_group55-65
|
1.329
|
0.504
|
0.564
|
0.573
|
|
othr_gov
|
raceB
|
6.259
|
0.394
|
4.655
|
0.000
|
|
othr_gov
|
raceH
|
3.728
|
0.368
|
3.574
|
0.000
|
|
uninsrd
|
(Intercept)
|
0.110
|
0.317
|
-6.964
|
0.000
|
|
uninsrd
|
age_group25-34
|
0.370
|
0.419
|
-2.377
|
0.017
|
|
uninsrd
|
age_group35-44
|
0.257
|
0.481
|
-2.822
|
0.005
|
|
uninsrd
|
age_group45-54
|
0.427
|
0.467
|
-1.825
|
0.068
|
|
uninsrd
|
age_group55-65
|
0.169
|
0.660
|
-2.692
|
0.007
|
|
uninsrd
|
raceB
|
9.499
|
0.416
|
5.410
|
0.000
|
|
uninsrd
|
raceH
|
4.841
|
0.385
|
4.100
|
0.000
|
|
Missing
|
(Intercept)
|
0.287
|
0.241
|
-5.193
|
0.000
|
|
Missing
|
age_group25-34
|
0.256
|
0.387
|
-3.520
|
0.000
|
|
Missing
|
age_group35-44
|
0.150
|
0.484
|
-3.917
|
0.000
|
|
Missing
|
age_group45-54
|
0.346
|
0.400
|
-2.657
|
0.008
|
|
Missing
|
age_group55-65
|
0.208
|
0.479
|
-3.279
|
0.001
|
|
Missing
|
raceB
|
0.372
|
1.045
|
-0.946
|
0.344
|
|
Missing
|
raceH
|
1.153
|
0.444
|
0.320
|
0.749
|
Model 3
ins_mlm3 <- multinom(insurance ~ age_group + race +
region,
data = insDF,
weights = ego.wawt)
## # weights: 60 (45 variable)
## initial value 1655.050008
## iter 10 value 1111.068012
## iter 20 value 1039.600207
## iter 30 value 1038.912537
## final value 1038.912285
## converged
kable(tidy(ins_mlm3, exponentiate = T),
caption= "Model 3", digits = 3) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 3
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
indvdl_othr
|
(Intercept)
|
0.053
|
0.492
|
-5.957
|
0.000
|
|
indvdl_othr
|
age_group25-34
|
1.157
|
0.572
|
0.255
|
0.799
|
|
indvdl_othr
|
age_group35-44
|
1.578
|
0.558
|
0.818
|
0.413
|
|
indvdl_othr
|
age_group45-54
|
2.445
|
0.546
|
1.638
|
0.101
|
|
indvdl_othr
|
age_group55-65
|
2.137
|
0.555
|
1.369
|
0.171
|
|
indvdl_othr
|
raceB
|
0.458
|
0.943
|
-0.828
|
0.408
|
|
indvdl_othr
|
raceH
|
0.480
|
0.660
|
-1.113
|
0.266
|
|
indvdl_othr
|
regionEasternWA
|
2.795
|
0.418
|
2.457
|
0.014
|
|
indvdl_othr
|
regionWesternWA
|
1.094
|
0.326
|
0.277
|
0.782
|
|
medicare_caid
|
(Intercept)
|
0.120
|
0.292
|
-7.274
|
0.000
|
|
medicare_caid
|
age_group25-34
|
0.810
|
0.331
|
-0.639
|
0.523
|
|
medicare_caid
|
age_group35-44
|
0.596
|
0.359
|
-1.443
|
0.149
|
|
medicare_caid
|
age_group45-54
|
0.722
|
0.369
|
-0.882
|
0.378
|
|
medicare_caid
|
age_group55-65
|
1.649
|
0.321
|
1.557
|
0.119
|
|
medicare_caid
|
raceB
|
1.754
|
0.428
|
1.312
|
0.189
|
|
medicare_caid
|
raceH
|
1.467
|
0.325
|
1.178
|
0.239
|
|
medicare_caid
|
regionEasternWA
|
3.963
|
0.321
|
4.286
|
0.000
|
|
medicare_caid
|
regionWesternWA
|
2.764
|
0.224
|
4.545
|
0.000
|
|
othr_gov
|
(Intercept)
|
0.034
|
0.444
|
-7.615
|
0.000
|
|
othr_gov
|
age_group25-34
|
1.100
|
0.475
|
0.200
|
0.842
|
|
othr_gov
|
age_group35-44
|
0.940
|
0.496
|
-0.124
|
0.901
|
|
othr_gov
|
age_group45-54
|
1.091
|
0.515
|
0.170
|
0.865
|
|
othr_gov
|
age_group55-65
|
1.294
|
0.508
|
0.507
|
0.612
|
|
othr_gov
|
raceB
|
7.079
|
0.406
|
4.825
|
0.000
|
|
othr_gov
|
raceH
|
3.816
|
0.376
|
3.564
|
0.000
|
|
othr_gov
|
regionEasternWA
|
1.611
|
0.585
|
0.816
|
0.415
|
|
othr_gov
|
regionWesternWA
|
3.274
|
0.304
|
3.899
|
0.000
|
|
uninsrd
|
(Intercept)
|
0.088
|
0.354
|
-6.877
|
0.000
|
|
uninsrd
|
age_group25-34
|
0.378
|
0.421
|
-2.311
|
0.021
|
|
uninsrd
|
age_group35-44
|
0.266
|
0.482
|
-2.749
|
0.006
|
|
uninsrd
|
age_group45-54
|
0.453
|
0.469
|
-1.691
|
0.091
|
|
uninsrd
|
age_group55-65
|
0.172
|
0.661
|
-2.665
|
0.008
|
|
uninsrd
|
raceB
|
10.244
|
0.422
|
5.513
|
0.000
|
|
uninsrd
|
raceH
|
4.516
|
0.389
|
3.871
|
0.000
|
|
uninsrd
|
regionEasternWA
|
2.604
|
0.477
|
2.005
|
0.045
|
|
uninsrd
|
regionWesternWA
|
1.371
|
0.359
|
0.879
|
0.379
|
|
Missing
|
(Intercept)
|
0.258
|
0.275
|
-4.921
|
0.000
|
|
Missing
|
age_group25-34
|
0.260
|
0.389
|
-3.460
|
0.001
|
|
Missing
|
age_group35-44
|
0.152
|
0.486
|
-3.877
|
0.000
|
|
Missing
|
age_group45-54
|
0.356
|
0.401
|
-2.571
|
0.010
|
|
Missing
|
age_group55-65
|
0.210
|
0.480
|
-3.257
|
0.001
|
|
Missing
|
raceB
|
0.381
|
1.048
|
-0.921
|
0.357
|
|
Missing
|
raceH
|
1.073
|
0.448
|
0.157
|
0.875
|
|
Missing
|
regionEasternWA
|
2.277
|
0.422
|
1.949
|
0.051
|
|
Missing
|
regionWesternWA
|
1.031
|
0.323
|
0.096
|
0.924
|
Model 4
ins_mlm4 <- multinom(insurance ~ age_group + race +
region + diag.status,
data = insDF,
weights = ego.wawt)
## # weights: 66 (50 variable)
## initial value 1655.050008
## iter 10 value 1104.083156
## iter 20 value 1029.355743
## iter 30 value 1027.875420
## final value 1027.870580
## converged
kable(tidy(ins_mlm4, exponentiate = T),
caption= "Model 4", digits = 3) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 4
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
indvdl_othr
|
(Intercept)
|
0.054
|
0.492
|
-5.947
|
0.000
|
|
indvdl_othr
|
age_group25-34
|
1.167
|
0.572
|
0.270
|
0.787
|
|
indvdl_othr
|
age_group35-44
|
1.630
|
0.559
|
0.875
|
0.382
|
|
indvdl_othr
|
age_group45-54
|
2.549
|
0.547
|
1.709
|
0.087
|
|
indvdl_othr
|
age_group55-65
|
2.346
|
0.563
|
1.515
|
0.130
|
|
indvdl_othr
|
raceB
|
0.459
|
0.943
|
-0.827
|
0.408
|
|
indvdl_othr
|
raceH
|
0.486
|
0.661
|
-1.092
|
0.275
|
|
indvdl_othr
|
regionEasternWA
|
2.754
|
0.419
|
2.417
|
0.016
|
|
indvdl_othr
|
regionWesternWA
|
1.078
|
0.326
|
0.230
|
0.818
|
|
indvdl_othr
|
diag.status
|
0.640
|
0.540
|
-0.827
|
0.408
|
|
medicare_caid
|
(Intercept)
|
0.118
|
0.293
|
-7.298
|
0.000
|
|
medicare_caid
|
age_group25-34
|
0.799
|
0.331
|
-0.679
|
0.497
|
|
medicare_caid
|
age_group35-44
|
0.559
|
0.362
|
-1.604
|
0.109
|
|
medicare_caid
|
age_group45-54
|
0.672
|
0.372
|
-1.067
|
0.286
|
|
medicare_caid
|
age_group55-65
|
1.425
|
0.336
|
1.055
|
0.291
|
|
medicare_caid
|
raceB
|
1.725
|
0.429
|
1.270
|
0.204
|
|
medicare_caid
|
raceH
|
1.445
|
0.325
|
1.132
|
0.258
|
|
medicare_caid
|
regionEasternWA
|
4.098
|
0.323
|
4.372
|
0.000
|
|
medicare_caid
|
regionWesternWA
|
2.818
|
0.225
|
4.613
|
0.000
|
|
medicare_caid
|
diag.status
|
1.679
|
0.312
|
1.661
|
0.097
|
|
othr_gov
|
(Intercept)
|
0.034
|
0.444
|
-7.590
|
0.000
|
|
othr_gov
|
age_group25-34
|
1.031
|
0.476
|
0.065
|
0.948
|
|
othr_gov
|
age_group35-44
|
0.735
|
0.512
|
-0.600
|
0.548
|
|
othr_gov
|
age_group45-54
|
0.895
|
0.523
|
-0.212
|
0.832
|
|
othr_gov
|
age_group55-65
|
0.867
|
0.540
|
-0.264
|
0.792
|
|
othr_gov
|
raceB
|
6.870
|
0.412
|
4.682
|
0.000
|
|
othr_gov
|
raceH
|
3.499
|
0.380
|
3.293
|
0.001
|
|
othr_gov
|
regionEasternWA
|
1.796
|
0.586
|
0.999
|
0.318
|
|
othr_gov
|
regionWesternWA
|
3.333
|
0.307
|
3.925
|
0.000
|
|
othr_gov
|
diag.status
|
3.020
|
0.394
|
2.807
|
0.005
|
|
uninsrd
|
(Intercept)
|
0.087
|
0.354
|
-6.874
|
0.000
|
|
uninsrd
|
age_group25-34
|
0.391
|
0.420
|
-2.236
|
0.025
|
|
uninsrd
|
age_group35-44
|
0.306
|
0.484
|
-2.451
|
0.014
|
|
uninsrd
|
age_group45-54
|
0.511
|
0.473
|
-1.418
|
0.156
|
|
uninsrd
|
age_group55-65
|
0.233
|
0.664
|
-2.190
|
0.029
|
|
uninsrd
|
raceB
|
10.117
|
0.423
|
5.467
|
0.000
|
|
uninsrd
|
raceH
|
4.716
|
0.392
|
3.952
|
0.000
|
|
uninsrd
|
regionEasternWA
|
2.464
|
0.479
|
1.884
|
0.060
|
|
uninsrd
|
regionWesternWA
|
1.371
|
0.362
|
0.871
|
0.384
|
|
uninsrd
|
diag.status
|
0.000
|
26.891
|
-0.291
|
0.771
|
|
Missing
|
(Intercept)
|
0.260
|
0.275
|
-4.903
|
0.000
|
|
Missing
|
age_group25-34
|
0.266
|
0.389
|
-3.406
|
0.001
|
|
Missing
|
age_group35-44
|
0.165
|
0.487
|
-3.698
|
0.000
|
|
Missing
|
age_group45-54
|
0.397
|
0.404
|
-2.290
|
0.022
|
|
Missing
|
age_group55-65
|
0.263
|
0.486
|
-2.751
|
0.006
|
|
Missing
|
raceB
|
0.377
|
1.048
|
-0.931
|
0.352
|
|
Missing
|
raceH
|
1.100
|
0.450
|
0.213
|
0.831
|
|
Missing
|
regionEasternWA
|
2.210
|
0.423
|
1.876
|
0.061
|
|
Missing
|
regionWesternWA
|
1.012
|
0.324
|
0.037
|
0.971
|
|
Missing
|
diag.status
|
0.177
|
1.193
|
-1.451
|
0.147
|
Model 5
We keep the missing income level as a predictor in the model
ins_mlm5 <- multinom(insurance ~ age_group + race +
region + diag.status + income_cat,
data = insDF,
weights = ego.wawt)
## # weights: 102 (80 variable)
## initial value 1655.050008
## iter 10 value 935.798013
## iter 20 value 852.656056
## iter 30 value 849.266973
## iter 40 value 849.164479
## iter 50 value 849.106145
## iter 60 value 849.099596
## iter 70 value 849.098351
## iter 80 value 849.097026
## iter 90 value 849.096417
## iter 90 value 849.096412
## iter 90 value 849.096412
## final value 849.096412
## converged
kable(tidy(ins_mlm5, exponentiate = T),
caption= "Model 5", digits = 3) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Model 5
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
indvdl_othr
|
(Intercept)
|
0.022
|
0.560
|
-6.803000e+00
|
0.000
|
|
indvdl_othr
|
age_group25-34
|
1.864
|
0.591
|
1.053000e+00
|
0.292
|
|
indvdl_othr
|
age_group35-44
|
3.108
|
0.588
|
1.929000e+00
|
0.054
|
|
indvdl_othr
|
age_group45-54
|
5.419
|
0.585
|
2.890000e+00
|
0.004
|
|
indvdl_othr
|
age_group55-65
|
4.364
|
0.597
|
2.469000e+00
|
0.014
|
|
indvdl_othr
|
raceB
|
0.423
|
0.947
|
-9.100000e-01
|
0.363
|
|
indvdl_othr
|
raceH
|
0.464
|
0.671
|
-1.144000e+00
|
0.253
|
|
indvdl_othr
|
regionEasternWA
|
2.087
|
0.438
|
1.678000e+00
|
0.093
|
|
indvdl_othr
|
regionWesternWA
|
0.864
|
0.339
|
-4.320000e-01
|
0.666
|
|
indvdl_othr
|
diag.status
|
0.579
|
0.552
|
-9.910000e-01
|
0.321
|
|
indvdl_othr
|
income_catfpl138
|
8.891
|
0.555
|
3.940000e+00
|
0.000
|
|
indvdl_othr
|
income_catfpl138-300
|
3.378
|
0.379
|
3.215000e+00
|
0.001
|
|
indvdl_othr
|
income_catfpl300-400
|
1.713
|
0.510
|
1.055000e+00
|
0.291
|
|
indvdl_othr
|
income_catfpl400-500
|
0.396
|
1.149
|
-8.060000e-01
|
0.420
|
|
indvdl_othr
|
income_catfpl500-600
|
0.465
|
1.378
|
-5.550000e-01
|
0.579
|
|
indvdl_othr
|
income_catMissing
|
2.257
|
0.708
|
1.151000e+00
|
0.250
|
|
medicare_caid
|
(Intercept)
|
0.007
|
0.490
|
-1.026700e+01
|
0.000
|
|
medicare_caid
|
age_group25-34
|
2.096
|
0.394
|
1.880000e+00
|
0.060
|
|
medicare_caid
|
age_group35-44
|
1.975
|
0.439
|
1.549000e+00
|
0.121
|
|
medicare_caid
|
age_group45-54
|
3.189
|
0.459
|
2.527000e+00
|
0.011
|
|
medicare_caid
|
age_group55-65
|
6.008
|
0.420
|
4.268000e+00
|
0.000
|
|
medicare_caid
|
raceB
|
1.442
|
0.516
|
7.100000e-01
|
0.478
|
|
medicare_caid
|
raceH
|
1.335
|
0.378
|
7.640000e-01
|
0.445
|
|
medicare_caid
|
regionEasternWA
|
2.066
|
0.386
|
1.880000e+00
|
0.060
|
|
medicare_caid
|
regionWesternWA
|
1.599
|
0.269
|
1.742000e+00
|
0.081
|
|
medicare_caid
|
diag.status
|
1.543
|
0.383
|
1.133000e+00
|
0.257
|
|
medicare_caid
|
income_catfpl138
|
169.329
|
0.475
|
1.080500e+01
|
0.000
|
|
medicare_caid
|
income_catfpl138-300
|
15.466
|
0.410
|
6.687000e+00
|
0.000
|
|
medicare_caid
|
income_catfpl300-400
|
3.939
|
0.558
|
2.458000e+00
|
0.014
|
|
medicare_caid
|
income_catfpl400-500
|
5.855
|
0.592
|
2.984000e+00
|
0.003
|
|
medicare_caid
|
income_catfpl500-600
|
0.000
|
NaN
|
NaN
|
NaN
|
|
medicare_caid
|
income_catMissing
|
22.859
|
0.516
|
6.070000e+00
|
0.000
|
|
othr_gov
|
(Intercept)
|
0.016
|
0.529
|
-7.790000e+00
|
0.000
|
|
othr_gov
|
age_group25-34
|
1.363
|
0.496
|
6.240000e-01
|
0.533
|
|
othr_gov
|
age_group35-44
|
1.234
|
0.544
|
3.860000e-01
|
0.699
|
|
othr_gov
|
age_group45-54
|
1.634
|
0.562
|
8.730000e-01
|
0.382
|
|
othr_gov
|
age_group55-65
|
1.460
|
0.563
|
6.710000e-01
|
0.502
|
|
othr_gov
|
raceB
|
6.706
|
0.421
|
4.516000e+00
|
0.000
|
|
othr_gov
|
raceH
|
3.191
|
0.391
|
2.968000e+00
|
0.003
|
|
othr_gov
|
regionEasternWA
|
1.425
|
0.596
|
5.950000e-01
|
0.552
|
|
othr_gov
|
regionWesternWA
|
2.646
|
0.316
|
3.082000e+00
|
0.002
|
|
othr_gov
|
diag.status
|
2.856
|
0.404
|
2.599000e+00
|
0.009
|
|
othr_gov
|
income_catfpl138
|
6.477
|
0.589
|
3.170000e+00
|
0.002
|
|
othr_gov
|
income_catfpl138-300
|
2.630
|
0.417
|
2.318000e+00
|
0.020
|
|
othr_gov
|
income_catfpl300-400
|
1.154
|
0.576
|
2.490000e-01
|
0.803
|
|
othr_gov
|
income_catfpl400-500
|
2.640
|
0.529
|
1.837000e+00
|
0.066
|
|
othr_gov
|
income_catfpl500-600
|
1.243
|
0.917
|
2.370000e-01
|
0.813
|
|
othr_gov
|
income_catMissing
|
3.511
|
0.603
|
2.081000e+00
|
0.037
|
|
uninsrd
|
(Intercept)
|
0.010
|
0.614
|
-7.488000e+00
|
0.000
|
|
uninsrd
|
age_group25-34
|
0.719
|
0.451
|
-7.330000e-01
|
0.464
|
|
uninsrd
|
age_group35-44
|
1.030
|
0.530
|
5.500000e-02
|
0.956
|
|
uninsrd
|
age_group45-54
|
2.315
|
0.542
|
1.550000e+00
|
0.121
|
|
uninsrd
|
age_group55-65
|
0.536
|
0.708
|
-8.810000e-01
|
0.378
|
|
uninsrd
|
raceB
|
10.074
|
0.461
|
5.006000e+00
|
0.000
|
|
uninsrd
|
raceH
|
4.554
|
0.420
|
3.613000e+00
|
0.000
|
|
uninsrd
|
regionEasternWA
|
1.556
|
0.510
|
8.670000e-01
|
0.386
|
|
uninsrd
|
regionWesternWA
|
0.924
|
0.388
|
-2.040000e-01
|
0.839
|
|
uninsrd
|
diag.status
|
0.000
|
477.953
|
-2.900000e-02
|
0.977
|
|
uninsrd
|
income_catfpl138
|
34.953
|
0.663
|
5.361000e+00
|
0.000
|
|
uninsrd
|
income_catfpl138-300
|
15.191
|
0.541
|
5.025000e+00
|
0.000
|
|
uninsrd
|
income_catfpl300-400
|
2.038
|
0.828
|
8.600000e-01
|
0.390
|
|
uninsrd
|
income_catfpl400-500
|
3.013
|
0.878
|
1.257000e+00
|
0.209
|
|
uninsrd
|
income_catfpl500-600
|
0.000
|
NaN
|
NaN
|
NaN
|
|
uninsrd
|
income_catMissing
|
11.353
|
0.750
|
3.238000e+00
|
0.001
|
|
Missing
|
(Intercept)
|
0.060
|
0.465
|
-6.051000e+00
|
0.000
|
|
Missing
|
age_group25-34
|
0.514
|
0.467
|
-1.423000e+00
|
0.155
|
|
Missing
|
age_group35-44
|
0.415
|
0.566
|
-1.555000e+00
|
0.120
|
|
Missing
|
age_group45-54
|
0.925
|
0.513
|
-1.530000e-01
|
0.879
|
|
Missing
|
age_group55-65
|
0.531
|
0.566
|
-1.119000e+00
|
0.263
|
|
Missing
|
raceB
|
0.510
|
1.079
|
-6.250000e-01
|
0.532
|
|
Missing
|
raceH
|
1.065
|
0.547
|
1.150000e-01
|
0.909
|
|
Missing
|
regionEasternWA
|
1.804
|
0.516
|
1.144000e+00
|
0.253
|
|
Missing
|
regionWesternWA
|
0.698
|
0.386
|
-9.310000e-01
|
0.352
|
|
Missing
|
diag.status
|
0.242
|
1.217
|
-1.164000e+00
|
0.244
|
|
Missing
|
income_catfpl138
|
5.898
|
0.666
|
2.666000e+00
|
0.008
|
|
Missing
|
income_catfpl138-300
|
1.802
|
0.555
|
1.062000e+00
|
0.288
|
|
Missing
|
income_catfpl300-400
|
0.359
|
1.280
|
-7.990000e-01
|
0.424
|
|
Missing
|
income_catfpl400-500
|
0.000
|
0.000
|
-2.950197e+53
|
0.000
|
|
Missing
|
income_catfpl500-600
|
0.000
|
0.000
|
-2.915339e+148
|
0.000
|
|
Missing
|
income_catMissing
|
42.398
|
0.442
|
8.478000e+00
|
0.000
|
Comparison of AICs
models <- get_formula("ins_mlm", 5)
aics <- get_aics("ins_mlm", 5, "mlm")
kable(data.frame(Model = models,
AIC = aics),
caption= paste("Comparison of AICs; n =", dim(cDF)[[1]]),
digits = 0) %>%
kable_styling(full_width=F, position="center",
bootstrap_options = c("striped"))
Comparison of AICs; n = 923
|
Model
|
AIC
|
|
insurance ~ age_group
|
2239
|
|
insurance ~ age_group + race
|
2197
|
|
insurance ~ age_group + race + region
|
2168
|
|
insurance ~ age_group + race + region + diag.status
|
2156
|
|
insurance ~ age_group + race + region + diag.status + income_cat
|
1858
|
- Income is a powerful predictor, even after all other demographics are included.
Predicted distribution
- The model is age + race + region + diag.status + income
#### Construct and save output
expand_df <- expand.grid(
age_group = levels(cDF$age_group),
race = c("B", "H", "O"),
region = c("EasternWA", "King", "WesternWA"),
diag.status = c(0, 1),
income_cat = levels(cDF$income_cat)
)
pred_ins <- cbind(expand_df,
predict(ins_mlm4,
newdata = expand_df,
"probs"))
# For some reason ZK included age_group in the model/prediction, but she
# averaged over it in the saved DF. We don't do that here. We also use
# pivot_longer in place of gather
pred_insl <- pred_ins %>%
mutate(diag.status = ifelse(diag.status == 0, "HIV-", "HIV+")) %>%
pivot_longer(c(emplyr:Missing),
names_to = "insurance",
values_to = "prob")
# Save to output object
save_out$pred_ins <- pred_insl
By race, region and Dx
ggplot(data = pred_insl,
aes(x = race, y = prob/6, # sums over omitted var age_group
group = insurance, fill = insurance)) +
geom_bar(stat="identity") +
facet_wrap(diag.status ~ region) +
labs(title = "Insurance distribution by race, Dx and region",
caption = paste("Multinomial regression results based on n =",
dim(cDF)[[1]],
"; Dx HIV+ =",
scales::percent(proportions(table(cDF$diag.status))[[2]],
accuracy = 0.1),
"; assumes homogenous age distributions"),
y = "predicted distribution") +
theme(strip.text = element_text(size = 10),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, hjust = 0.5),
axis.text.y = element_text(size = 10),
legend.position = "bottom")

By age, region and Dx
ggplot(data = pred_insl,
aes(x = age_group, y = prob/3, # sums over omitted var race
group = insurance, fill = insurance)) +
geom_bar(stat="identity") +
facet_wrap(diag.status ~ region) +
labs(title = "Insurance distribution by Age, Dx and region",
caption = paste("Multinomial regression results based on n =",
dim(cDF)[[1]],
"; Dx HIV+ =",
scales::percent(proportions(table(cDF$diag.status))[[2]],
accuracy = 0.1),
"; assumes homogenous race distributions"),
y = "predicted distribution") +
theme(strip.text = element_text(size = 10),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, hjust = 0.5),
axis.text.y = element_text(size = 10),
legend.position = "bottom")

Save out
These aren’t used, but I save them out just in case, in the Data/Descriptives folder.
descTable <-
tibble(Objects = names(save_out),
Description = c("Predicted income categories",
"Income category bounds (as %fpl)",
"Predicted insurance"),
Method = c("Multinomial regression",
"Transformation",
"Multinomial regression"),
Levels = c("age x race x region",
"6 categories",
"age x race x region x diag.status x income"),
MakeFile = c("make_WhampIncIns",
"make_WhampIncIns",
"make_WhampIncIns"),
DataSource = c("WHAMP survey",
"WHAMP survey",
"WHAMP survey"))
save_out <- c(save_out, list(descTable = descTable))
kable(descTable,
caption= "WHAMP Income and Insurance") %>%
kable_styling(position = "center",
bootstrap_options = c("striped"))
WHAMP Income and Insurance
|
Objects
|
Description
|
Method
|
Levels
|
MakeFile
|
DataSource
|
|
pred_inc
|
Predicted income categories
|
Multinomial regression
|
age x race x region
|
make_WhampIncIns
|
WHAMP survey
|
|
income_levels
|
Income category bounds (as %fpl)
|
Transformation
|
6 categories
|
make_WhampIncIns
|
WHAMP survey
|
|
pred_ins
|
Predicted insurance
|
Multinomial regression
|
age x race x region x diag.status x income
|
make_WhampIncIns
|
WHAMP survey
|
### Params
saveRDS(save_out, here::here("Data", "Descriptives", "WhampIncIns.RDS"))