Data management

All of the data in this report come from the WHAMP Survey. The survey was conducted over a three month period using social media (primarily FaceBook, with a small number of Grindr and Growlr respondents) from Sep 11 - Dec 11 2019.

wDF <- WHAMPData::whamp_wide
origwDF <- wDF

# egodata only used for numpart and PrEP indication
# note that only respondents who
# answered the summary SNAP question and had anal sex partners
# after 2010 are in egoDF: 682 WHAMP respondents, 150 ARTnet
# see: Github/WHAMP2/Reports/WHAMPsurvey/Reports/00_data_flow.html#sample-exclusions-summary
egoDF <- WHAMPData::whampArtnetWA_egodata 


income_cat <- attr(wDF$HHINCOME, "labels")
income_lab <- gsub(",", "", names(income_cat))
income_lab <- do.call(rbind, lapply(str_extract_all(income_lab, "[0-9]+"), 
                                    function(x) as.numeric(x[1:2])))
income_lab <- data.frame(income_lab)
names(income_lab) <- c("income_lb", "income_ub")
income_lab$income_cat <- income_cat

income_lab <- data.frame(income_lab)
income_lab$income_ub[income_lab$income_lb == 80000] <- Inf
income_lab <- income_lab %>% filter(income_cat < 10)

prepfreq_lab <- names(attr(wDF$PREPCHKFREQ_CURR, "labels"))


# Get PrEP indication and number of ongoing partners from the ego data:
# PrEP is recommended for those with ongoing HIV positive partner;
# so we get
# (1) the number of ongoing sex partners, and 
# (2) the number of + ongoing

partDF <- bind_rows(egoDF$main$alters, 
                    egoDF$casl$alters) %>%
  arrange(ego.id, type) %>%
  filter(ongoing == 1) %>%
  group_by(ego.id) %>%
  summarise(n_part = n(),
            n_pos_part = sum(hiv),
            prepRecIndicated = max(hiv))


# fpl is from: https://aspe.hhs.gov/2019-poverty-guidelines
# we use the guideline, not the threshold here

adap_fpl_cut <- data.frame(hhsize = c(1:8), 
                           fpl = c(12490, 16910, 21330, 25750,
                                   30170, 34590, 39010, 43430))


# Pull variables into DF

wDF <- wDF %>% 
  select(Vrid, ego.wawt, AGE, age.grp, diag.status, 
         HHINCOME, DEPEND, race.cat, race, 
         region, region,
         
         # Insurance type
         TYP_INSA, TYP_INSA2, TYP_INSG1, TYP_INSG2,
         TYP_INSH, TYP_INSD, TYP_INSE,
         TYP_INSJ, TYP_INSF, TYP_INSI, TYP_INSC,
         TYP_INSB, TYP_INSF_OTHER,
         
         # PrEP related variables
         PREP_AWARE, PREP_EVER, PREP_CURRENT,
         PDAPA, PDAPB, PDAPC, PDAPD, PDAPE, 
         PDAPF, PDAPG, PDAPH, PDAPI, PDAPD_OTHER,
         PREPCHKFREQ_CURR,
         
         # ART related variables
         ART_CURRENT, ADAP, 
         
         # STI: GC, CT, Syphilis
         BSTIA, BSTIB, BSTIC,

         # Risk behavior
         snap, deg.main, deg.casl, deg.tot, 
         count.oo.part, risk.grp) %>%
  
  rename(race.cat4 = race.cat) %>%
  
  mutate(ego.id = Vrid, 
         PREPCHKFREQ_CURR = factor(
           PREPCHKFREQ_CURR, 
           levels = c(1, 2, 3, 4, 5, 6, 7, 8, 88, 99), 
           labels = prepfreq_lab), 
         PREP_CURRENT = ifelse(!is.na(PREP_AWARE) &
                                 is.na(PREP_CURRENT), 
                               0, PREP_CURRENT)) %>%
  left_join(partDF, by = "ego.id") 


wDF <- wDF %>% 
  mutate(hh_income = HHINCOME,
         HHINCOME = ifelse(HHINCOME %in% c(88, 99), NA, HHINCOME), 
         DEPEND = ifelse(is.na(HHINCOME), NA, DEPEND)) %>%
  left_join(. , adap_fpl_cut %>% select(hhsize, fpl), 
            by = c("DEPEND" = "hhsize")) %>%
  left_join(., income_lab, 
            by = c("HHINCOME" = "income_cat")) %>%
  mutate(fpl_lb = round(income_lb / fpl, 2), 
         fpl_ub = round(income_ub / fpl, 2), 
         compare = ifelse(floor(fpl_ub - 0.001) > floor(fpl_lb), 
                          floor(fpl_ub - 0.001), floor(fpl_lb))) %>%
  mutate(compare = 
           ifelse(fpl_ub > 1.38 & fpl_lb < 1.38 | fpl_ub < 1.38, 1.38,
                  ifelse(is.infinite(compare), 
                         floor(fpl_lb), compare)), 
         prop = ifelse(floor(fpl_ub - 0.001) > floor(fpl_lb), 
                       (compare - fpl_lb) / (fpl_ub - fpl_lb), 1)) %>%
  mutate(compare = ifelse(compare == 1, 2, compare), 
         prop = ifelse(compare > fpl_ub | is.infinite(fpl_ub), 1, prop)) %>%
  mutate(prop = ifelse(compare == 1.38 & fpl_ub > 1.38, 
                       (1.38 - fpl_lb) / (fpl_ub - fpl_lb), prop), 
         tmp_class = ifelse(compare > floor(fpl_lb) & prop > 0.5 & compare > 2, 
                            compare - 1, 
                            ifelse(compare == 1.38 & prop < 0.5, 2, compare))) %>%
  mutate(income_cat = as.numeric(factor(tmp_class))) %>%
  mutate(income_cat = ifelse(income_cat >= 6, 6, 
                             ifelse(is.infinite(fpl_ub), 6, income_cat))) %>%
  mutate(income_cat = factor(income_cat, 
                             levels = c(1:6), 
                             labels = c("fpl138", "fpl138-300", "fpl300-400", 
                                        "fpl400-500", "fpl500-600", "fpl600+"))) %>%
  mutate(fpl_lb = round(income_lb / 12490, 2) * 100, 
         fpl_ub = round(income_ub / 12490, 2) * 100) %>%
  mutate(tmp_class = paste0("fpl", fpl_lb, "-", fpl_ub)) %>% 
  mutate(income_cat_o = ifelse(tmp_class == "fpl641-Inf", "fpl641+", 
                               ifelse(tmp_class == "fplNA-NA", NA, tmp_class))) %>%
  mutate(hhincome2 = paste0(income_lb, "-", income_ub)) %>%
  mutate(hhincome2 = ifelse(hhincome2 == "NA-NA", NA, 
                            ifelse(hhincome2 == "80000-Inf", "80000+", hhincome2)))


ins_names <- c("ins_employ", "ins_individual", "ins_medicaid", 
               "ins_medicare", "ins_assistance", 
               "ins_tricare", "ins_VA", "ins_indian", "ins_other", 
               "ins_uninsure", 
               "ins_na1", "ins_na2", "ins_other2")

colnames(wDF)[grep("TYP_INS", colnames(wDF))] <- ins_names

sumInsType <- wDF %>% select(starts_with("ins_"), -c("ins_na1", "ins_na2")) %>%
  mutate(ins_other2 = ifelse(!is.na(ins_other2), 1, 0)) %>%
  mutate(sumInsType = rowSums(.)) %>% count(sumInsType)


wDF <- wDF %>% 
  select(-c("ins_na1", "ins_na2", "ins_other2")) %>%
  mutate(ins_individual = replace(ins_individual, ins_individual == 1, 2),
         ins_other = replace(ins_other, ins_other == 1, 2),
         ins_employ = replace(ins_employ, ins_employ == 1, 3),
         ins_assistance = replace(ins_assistance, ins_assistance == 1, 4),
         ins_tricare = replace(ins_tricare, ins_tricare == 1, 4),
         ins_VA = replace(ins_VA, ins_VA == 1, 4),
         ins_indian = replace(ins_indian, ins_VA == 1, 4),
         ins_medicaid = replace(ins_medicaid, ins_medicaid == 1, 5), 
         ins_medicare = replace(ins_medicare, ins_medicare == 1, 5)) %>%
  mutate(max = pmax(ins_uninsure, ins_individual, ins_other, ins_employ,
                    ins_assistance, ins_tricare, ins_VA, ins_indian, 
                    ins_medicaid, ins_medicare)) %>%
  mutate(insurance = case_when(max == 5 ~ "medicare_caid",
                               max == 4 ~ "othr_gov",
                               max == 3 ~ "emplyr",
                               max == 2 ~ "indvdl_othr",
                               max == 1 ~ "uninsrd", 
                               TRUE ~ NA_character_))

# age group: maximum age 
wDF <- wDF %>% 
  mutate(age_group = factor(age.grp, 
                            levels = c(1:5), 
                            labels = c("15-24", "25-34", "35-44", 
                                       "45-54", "55-65")))

Missing data

Missing values in the demographics come mostly from insurance and income_cat. We group the missing insurance with the uninsured, based on exploration of their patterns. We use the missing income cases as a factor level in the analyses, so we don’t lose them. For all other demographic attributes (including HIV Dx), the missing values are very small, so we use the complete case subset.

Missing values later in the survey, for ART and PrEP use and STI incidence come from attrition.

table(wDF$age_group, is.na(wDF$insurance)) %>% 
  kable(caption= "Missing insurance by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Missing insurance by age
	FALSE	TRUE
15-24	105	19
25-34	248	12
35-44	181	6
45-54	161	12
55-65	176	7

table(wDF$age_group, is.na(wDF$income_cat)) %>%
  kable(caption= "Missing income by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Missing income by age
	FALSE	TRUE
15-24	100	24
25-34	240	20
35-44	176	11
45-54	160	13
55-65	168	15

# assign missing insurance to the uninsured/NA category
wDF <- wDF %>%
  mutate(insurance = ifelse(is.na(insurance) | insurance == "uninsrd",
                            "uninsrd_NA", insurance),
         income_cat = forcats::fct_explicit_na(income_cat, "Missing"))


table(wDF$age_group, wDF$insurance) %>% 
  kable(caption= "Final insurance by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Final insurance by age
	emplyr	indvdl_othr	medicare_caid	othr_gov	uninsrd_NA
15-24	66	5	19	6	28
25-34	172	12	32	17	27
35-44	128	13	24	8	14
45-54	105	17	20	12	19
55-65	106	14	41	11	11

table(wDF$age_group, wDF$income_cat) %>% 
  kable(caption= "Final income categories by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Final income categories by age
	fpl138	fpl138-300	fpl300-400	fpl400-500	fpl500-600	fpl600+	Missing
15-24	31	33	10	5	1	20	24
25-34	23	70	25	24	17	81	20
35-44	19	25	21	13	3	95	11
45-54	15	22	11	8	5	99	13
55-65	16	40	15	7	1	89	15

# complete case DF for attrs & diag.status
wDF$complete <- with(wDF, complete.cases(age_group, race, region, diag.status))
table(wDF$complete)

## 
## FALSE  TRUE 
##     4   923

cDF <- wDF %>% filter(complete == 1)

# Create the output object
save_out <- list()

HH income and dependents

Income

Table

These are the weighted frequencies

# hh_income is the raw variable
inc_cats <- data.frame(orig = names(attr(wDF$hh_income, "labels")))  %>%
  mutate(labs = gsub(" a.*", "", orig),
         labs = gsub("Im not sure", "DK", labs),
         labs = gsub("I prefer not to", "RF", labs))

names(attr(wDF$hh_income, "labels")) <- inc_cats$labs

sjmisc::frq(wDF$hh_income, weights = ego.wawt)

## 
## What was your household income last year from all sources before taxes? (x) <numeric>
## # total N=927  valid N=898  mean=10.59  sd=20.93
## 
## Value |              Label |   N | Raw % | Valid % | Cum. %
## -----------------------------------------------------------
##     1 |      $0 to $19,999 |  94 | 10.14 |   10.47 |  10.47
##     2 | $20,000 to $29,999 |  89 |  9.60 |    9.91 |  20.38
##     3 | $30,000 to $39,999 |  72 |  7.77 |    8.02 |  28.40
##     4 | $40,000 to $49,999 |  73 |  7.87 |    8.13 |  36.53
##     5 | $50,000 to $59,999 |  53 |  5.72 |    5.90 |  42.43
##     6 | $60,000 to $69,999 |  47 |  5.07 |    5.23 |  47.66
##     7 | $70,000 to $79,999 |  58 |  6.26 |    6.46 |  54.12
##     8 |    $80,000 or more | 360 | 38.83 |   40.09 |  94.21
##    88 |                 DK |  23 |  2.48 |    2.56 |  96.77
##    99 |                 RF |  29 |  3.13 |    3.23 | 100.00
##  <NA> |               <NA> |  29 |  3.13 |    <NA> |   <NA>

Plot

wDF %>% group_by(hh_income) %>%
  summarize(n.wtd = sum(ego.wawt)) %>%
  mutate(prop.wtd = n.wtd / sum(n.wtd)) %>%
  ggplot(aes(x = sjlabelled::as_label(hh_income), y = prop.wtd)) +
  geom_bar(stat="identity", color = "blue", alpha = 0.7) +
  labs(title = "HH income",
       x = "HH income category",
       y = "Wtd proportion") +
  theme(axis.text.x = element_text(size = 8, angle = 50, 
                                   vjust = 0.6))

Dependents

Overall

tempDF <- wDF %>% 
  group_by(DEPEND) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% 
  kable(caption= paste("HH Dependents; n = ",
                       sum(tempDF$n)),
        digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

HH Dependents; n = 927
DEPEND	n	n.wtd	prop.wtd
1	416	431.6	0.47
2	311	304.3	0.33
3	49	45.2	0.05
4	41	37.0	0.04
5	17	15.1	0.02
6	8	9.4	0.01
7	2	1.7	0.00
NA	83	82.7	0.09

By Income

tempDF <- wDF %>% 
  filter(hh_income < 88 & !is.na(DEPEND)) %>% 
  group_by(DEPEND, hh_income) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(DEPEND) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = DEPEND,
              values_from = prop.wtd) %>%
  tibble::add_column(income = names(attr(wDF$hh_income, "labels"))[1:8],
                    .before=1) %>%
  select(-hh_income) %>%
  kable(caption= paste("Distribution of income by #Dependents; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dependents" = 7))

Distribution of income by #Dependents; n = 844
	Dependents
income	1	2	3	4	5	6	7
$0 to $19,999	0.16	0.03	0.05	0.06	0.04	NA	0.6
$20,000 to $29,999	0.18	0.04	0.06	0.02	NA	0.07	NA
$30,000 to $39,999	0.10	0.05	0.07	0.05	0.05	0.17	NA
$40,000 to $49,999	0.10	0.06	0.07	0.04	0.06	0.28	NA
$50,000 to $59,999	0.08	0.05	0.04	0.04	NA	NA	NA
$60,000 to $69,999	0.06	0.05	0.03	0.02	NA	NA	0.4
$70,000 to $79,999	0.07	0.07	0.10	0.05	0.11	NA	NA
$80,000 or more	0.25	0.65	0.59	0.72	0.73	0.49	NA

Single person HH show a distinct pattern, with more low income and fewer high income earners. Roughly a third of these HH are in the bottom two categories, and just over 25% are in the top category
With 2+ in the HH, the majority (over 60% of HH) are in the top income category, with the rest relatively evenly distributed in the lower categories.
For the 5+ HH, there are slightly fewer in the top category, and slightly more in the middle.

FPL guidelines

These are the federal poverty level (FPL) guidelines by the number of dependents in the household. Source here.

kable(adap_fpl_cut, 
      caption= "FPL by dependents") %>% 
  kable_styling(full_width=F, position="center")

FPL by dependents
hhsize	fpl
1	12490
2	16910
3	21330
4	25750
5	30170
6	34590
7	39010
8	43430

FPL adjusted income

Match the FPL by the number of dependents for each respondent.
Divide the original income bounds by the FPL. This resulted in an income interval with a lower bound and an upper bound of FPL of each individual.
Roughly classify individuals’ income into 5 income categories <138% FPL, 138%-300% FPL, 300%-400% FPL, 500%-600% FPL, and >600% FPL. Then we calculated the overlapping proportion of the interval in step 2 and the intervals defined in this step. If the overlapping proportion is over 50% in an income category, we assign the individual to this category.
If a respondent originally said that their income was above $80,000 per year, the respondent was assigned in the income category >600% FPL no matter how many dependents this respondent had.

Descriptives

Overall

In general, individuals with lower income shifted their income categories after adjusting for the number of dependents.

tempDF <- wDF %>%
  filter(!is.na(income_cat)) %>%
  group_by(income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  ungroup() %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) 

p_income <- ggplot(tempDF, 
                   aes(x = income_cat, y = prop.wtd, 
                       text = prop.wtd)) +
  geom_bar(fill = "blue", alpha = 0.5,
           stat="identity",
           position = position_dodge2(width = 0.9)) +
  labs(title = "Income distribution in FPL categories",
       sub = "Not adjusted for dependents",
       caption = "Weighted WHAMP survey data",
       x= "income FPL category",
       y = "weighted proportion") +
  ylim(0, 0.5)

title.plotly <-
  list(text = paste0('Income distribution in FPL categories',
                     '<br>',
                     '<sup>',
                     'Not adjusted for dependents','</sup>'))

fig <- ggplotly(p_income , tooltip = "text",
                width = 600, height = 400) %>%
  layout(hoverlabel = list(font=list(color="white")),
         title = title.plotly)

fig

Age

Table

tempDF <- wDF %>% 
  filter(!is.na(income_cat)) %>% 
  mutate(income_cat = factor(income_cat,
                             levels = sort(unique(income_cat)))) %>%
  group_by(age_group, income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of income by age; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5))

Distribution of income by age; n = 927
	Age group
income_cat	15-24	25-34	35-44	45-54	55-65
fpl138	0.21	0.08	0.09	0.08	0.08
fpl138-300	0.30	0.26	0.11	0.11	0.22
fpl300-400	0.08	0.09	0.11	0.06	0.08
fpl400-500	0.04	0.10	0.07	0.04	0.03
fpl500-600	0.01	0.07	0.01	0.02	0.01
fpl600+	0.19	0.33	0.55	0.63	0.50
Missing	0.18	0.08	0.06	0.06	0.09

Plot

mycol = RColorBrewer::brewer.pal(length(unique(wDF$age_group))+1,
                                 'Oranges')[-1]

ggplot(tempDF, 
       aes(x = income_cat, y = prop.wtd,
           group = age_group, color = age_group)) +
  geom_line(size = 1) +
  geom_point() +
  scale_color_manual(values=mycol)

Prediction: Adjusted Income

Multinomial regressions

The coefficients are relative risk ratios.

Model 1

# multinom only uses complete cases, and inconsistently handles missing
# cases which breaks other functions like summary()

cDFi <- cDF %>% filter(income_cat != "Missing")

inc_mlm1 <- multinom(income_cat ~ diag.status, 
                     data = cDFi, 
                     weights = ego.wawt)

## # weights:  18 (10 variable)
## initial  value 1510.111738 
## iter  10 value 1207.124161
## final  value 1205.269549 
## converged

#summary(inc_mlm1)
kable(tidy(inc_mlm1, exponentiate = T), digits = 2, 
      caption = "Model 1") %>% 
  kable_styling(full_width = F, 
                position="center", 
                bootstrap_options = c("striped"))

Model 1
y.level	term	estimate	std.error	statistic	p.value
fpl138-300	(Intercept)	1.92	0.13	4.91	0.00
fpl138-300	diag.status	1.01	0.42	0.03	0.97
fpl300-400	(Intercept)	0.81	0.16	-1.34	0.18
fpl300-400	diag.status	1.10	0.50	0.20	0.84
fpl400-500	(Intercept)	0.55	0.18	-3.30	0.00
fpl400-500	diag.status	1.19	0.54	0.31	0.75
fpl500-600	(Intercept)	0.27	0.23	-5.57	0.00
fpl500-600	diag.status	0.40	1.05	-0.88	0.38
fpl600+	(Intercept)	4.15	0.12	11.87	0.00
fpl600+	diag.status	1.20	0.37	0.48	0.63

Model 2

inc_mlm2 <- multinom(income_cat ~ age_group, 
                     data = cDFi, 
                     weights = ego.wawt)

## # weights:  36 (25 variable)
## initial  value 1510.111738 
## iter  10 value 1170.486371
## iter  20 value 1142.577044
## iter  30 value 1139.946598
## final  value 1139.943809 
## converged

kable(tidy(inc_mlm2, exponentiate = T), digits = 2, 
      caption= "Model 2") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 2
y.level	term	estimate	std.error	statistic	p.value
fpl138-300	(Intercept)	1.42	0.23	1.55	0.12
fpl138-300	age_group25-34	2.12	0.35	2.17	0.03
fpl138-300	age_group35-44	0.91	0.40	-0.23	0.82
fpl138-300	age_group45-54	1.00	0.43	-0.01	0.99
fpl138-300	age_group55-65	1.98	0.39	1.77	0.08
fpl300-400	(Intercept)	0.36	0.33	-3.01	0.00
fpl300-400	age_group25-34	3.08	0.46	2.46	0.01
fpl300-400	age_group35-44	3.56	0.47	2.71	0.01
fpl300-400	age_group45-54	2.01	0.55	1.27	0.20
fpl300-400	age_group55-65	2.61	0.51	1.88	0.06
fpl400-500	(Intercept)	0.19	0.43	-3.84	0.00
fpl400-500	age_group25-34	6.16	0.53	3.41	0.00
fpl400-500	age_group35-44	4.18	0.57	2.50	0.01
fpl400-500	age_group45-54	2.78	0.65	1.58	0.11
fpl400-500	age_group55-65	1.93	0.68	0.97	0.33
fpl500-600	(Intercept)	0.04	0.90	-3.62	0.00
fpl500-600	age_group25-34	20.32	0.96	3.13	0.00
fpl500-600	age_group35-44	4.58	1.10	1.38	0.17
fpl500-600	age_group45-54	8.41	1.07	2.00	0.05
fpl500-600	age_group55-65	2.34	1.30	0.66	0.51
fpl600+	(Intercept)	0.88	0.25	-0.52	0.60
fpl600+	age_group25-34	4.40	0.36	4.13	0.00
fpl600+	age_group35-44	7.35	0.37	5.44	0.00
fpl600+	age_group45-54	9.60	0.39	5.79	0.00
fpl600+	age_group55-65	7.27	0.38	5.16	0.00

Model 3

inc_mlm3 <- multinom(income_cat ~ age_group + factor(race), 
                     data = cDFi, 
                     weights = ego.wawt)

## # weights:  48 (35 variable)
## initial  value 1510.111738 
## iter  10 value 1168.693774
## iter  20 value 1128.622592
## iter  30 value 1126.846067
## iter  40 value 1126.569792
## iter  50 value 1126.565275
## final  value 1126.565256 
## converged

kable(tidy(inc_mlm3, exponentiate = T), digits = 2, 
      caption= "Model 3") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 3
y.level	term	estimate	std.error	statistic	p.value
fpl138-300	(Intercept)	2.89	0.55	1.93	0.05
fpl138-300	age_group25-34	2.08	0.35	2.11	0.04
fpl138-300	age_group35-44	0.90	0.40	-0.26	0.79
fpl138-300	age_group45-54	0.99	0.43	-0.03	0.98
fpl138-300	age_group55-65	2.03	0.39	1.83	0.07
fpl138-300	factor(race)H	0.50	0.61	-1.13	0.26
fpl138-300	factor(race)O	0.46	0.53	-1.46	0.15
fpl300-400	(Intercept)	0.58	0.68	-0.79	0.43
fpl300-400	age_group25-34	3.02	0.46	2.42	0.02
fpl300-400	age_group35-44	3.53	0.47	2.69	0.01
fpl300-400	age_group45-54	1.97	0.55	1.24	0.22
fpl300-400	age_group55-65	2.58	0.51	1.85	0.06
fpl300-400	factor(race)H	0.49	0.75	-0.96	0.34
fpl300-400	factor(race)O	0.63	0.63	-0.72	0.47
fpl400-500	(Intercept)	0.22	0.82	-1.85	0.06
fpl400-500	age_group25-34	6.24	0.53	3.43	0.00
fpl400-500	age_group35-44	4.16	0.57	2.50	0.01
fpl400-500	age_group45-54	2.84	0.65	1.61	0.11
fpl400-500	age_group55-65	2.03	0.68	1.04	0.30
fpl400-500	factor(race)H	1.17	0.82	0.19	0.85
fpl400-500	factor(race)O	0.78	0.73	-0.34	0.74
fpl500-600	(Intercept)	0.00	0.61	-25.46	0.00
fpl500-600	age_group25-34	21.47	0.96	3.18	0.00
fpl500-600	age_group35-44	4.64	1.10	1.39	0.16
fpl500-600	age_group45-54	8.93	1.07	2.05	0.04
fpl500-600	age_group55-65	2.52	1.30	0.71	0.48
fpl500-600	factor(race)H	347946.96	0.44	29.24	0.00
fpl500-600	factor(race)O	168177.03	0.38	31.32	0.00
fpl600+	(Intercept)	1.04	0.56	0.08	0.94
fpl600+	age_group25-34	4.29	0.36	4.04	0.00
fpl600+	age_group35-44	7.34	0.37	5.42	0.00
fpl600+	age_group45-54	9.30	0.39	5.69	0.00
fpl600+	age_group55-65	6.86	0.39	4.99	0.00
fpl600+	factor(race)H	0.40	0.61	-1.50	0.13
fpl600+	factor(race)O	0.93	0.53	-0.13	0.89

Model 4

inc_mlm4 <- multinom(income_cat ~ age_group + 
                       factor(race) + factor(region), 
                     data = cDFi, 
                     weights = ego.wawt)

## # weights:  60 (45 variable)
## initial  value 1510.111738 
## iter  10 value 1141.915601
## iter  20 value 1097.041447
## iter  30 value 1094.935028
## iter  40 value 1094.415678
## iter  50 value 1094.348000
## final  value 1094.347252 
## converged

kable(tidy(inc_mlm4, exponentiate = T), digits = 2, 
      caption= "Model 4") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 4
y.level	term	estimate	std.error	statistic	p.value
fpl138-300	(Intercept)	1.75	0.64	0.87	0.38
fpl138-300	age_group25-34	2.07	0.35	2.09	0.04
fpl138-300	age_group35-44	0.92	0.40	-0.20	0.84
fpl138-300	age_group45-54	1.02	0.44	0.04	0.97
fpl138-300	age_group55-65	2.14	0.39	1.95	0.05
fpl138-300	factor(race)H	0.60	0.62	-0.83	0.41
fpl138-300	factor(race)O	0.52	0.54	-1.22	0.22
fpl138-300	factor(region)King	1.92	0.37	1.74	0.08
fpl138-300	factor(region)WesternWA	1.25	0.37	0.60	0.55
fpl300-400	(Intercept)	0.28	0.81	-1.56	0.12
fpl300-400	age_group25-34	3.02	0.46	2.40	0.02
fpl300-400	age_group35-44	3.63	0.47	2.73	0.01
fpl300-400	age_group45-54	2.01	0.55	1.26	0.21
fpl300-400	age_group55-65	2.72	0.51	1.95	0.05
fpl300-400	factor(race)H	0.61	0.75	-0.65	0.51
fpl300-400	factor(race)O	0.73	0.63	-0.50	0.62
fpl300-400	factor(region)King	2.38	0.49	1.78	0.08
fpl300-400	factor(region)WesternWA	1.61	0.48	0.99	0.32
fpl400-500	(Intercept)	0.08	0.98	-2.60	0.01
fpl400-500	age_group25-34	6.23	0.54	3.41	0.00
fpl400-500	age_group35-44	4.34	0.57	2.55	0.01
fpl400-500	age_group45-54	2.94	0.65	1.65	0.10
fpl400-500	age_group55-65	2.19	0.68	1.15	0.25
fpl400-500	factor(race)H	1.54	0.82	0.52	0.60
fpl400-500	factor(race)O	0.93	0.74	-0.10	0.92
fpl400-500	factor(region)King	3.40	0.58	2.10	0.04
fpl400-500	factor(region)WesternWA	1.95	0.59	1.13	0.26
fpl500-600	(Intercept)	0.00	0.73	-21.69	0.00
fpl500-600	age_group25-34	21.14	0.97	3.16	0.00
fpl500-600	age_group35-44	4.83	1.11	1.42	0.15
fpl500-600	age_group45-54	9.60	1.07	2.11	0.03
fpl500-600	age_group55-65	2.75	1.30	0.78	0.44
fpl500-600	factor(race)H	285573.47	0.47	26.66	0.00
fpl500-600	factor(race)O	127991.23	0.45	26.20	0.00
fpl500-600	factor(region)King	3.06	0.74	1.51	0.13
fpl500-600	factor(region)WesternWA	1.22	0.78	0.25	0.80
fpl600+	(Intercept)	0.21	0.67	-2.33	0.02
fpl600+	age_group25-34	4.30	0.37	3.92	0.00
fpl600+	age_group35-44	8.09	0.38	5.48	0.00
fpl600+	age_group45-54	10.74	0.41	5.83	0.00
fpl600+	age_group55-65	8.08	0.40	5.22	0.00
fpl600+	factor(race)H	0.58	0.63	-0.86	0.39
fpl600+	factor(race)O	1.24	0.54	0.40	0.69
fpl600+	factor(region)King	7.27	0.38	5.17	0.00
fpl600+	factor(region)WesternWA	1.70	0.38	1.38	0.17

Comparison of AICs

models <- get_formula("inc_mlm", 4)
aics <- get_aics("inc_mlm", 4, "mlm")

kable(data.frame(model = models, 
                 AIC = aics),
      digits = 0,
      caption= paste("Comparison of AICs; n =", dim(cDFi)[[1]])) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Comparison of AICs; n = 842
model	AIC
income_cat ~ diag.status	2431
income_cat ~ age_group	2330
income_cat ~ age_group + factor(race)	2323
income_cat ~ age_group + factor(race) + factor(region)	2279

The final model is model 4, including age + race + region.

Predicted distribution

# Construct and save output
expand_df <- expand.grid(
  age_group = factor(c(1:5), 
                     levels = c(1:5), 
                     labels = levels(wDF$age_group)), 
  race = factor(c("B", "H", "O")), 
  region = c("EasternWA", "King", "WesternWA"))

pred_inc <- cbind(expand_df, 
                  predict(inc_mlm4, 
                          newdata = expand_df, 
                          "probs"))

pred_inc <- pred_inc %>% 
  gather(income, prob, `fpl138`:`fpl600+`)

income_levels <- data.frame(lvl = c(1:6), 
                            lb = c(0, 138, 300, 400, 500, 600), 
                            ub = c(137, 299, 399, 499, 599, 1000))

# Save output
save_out$pred_inc <- pred_inc
save_out$income_levels <- income_levels

ggplot(data = pred_inc, aes(x = income, y = prob, group = race)) +
  geom_line(aes(color = race)) +
  facet_wrap(~ region + age_group, ncol = 5) +
  ylim(0, 0.8) +
  labs(title = "FPL income category by race, region and age",
       y = "predicted distribution") + 
  theme(axis.text.x = element_text(size = 8, angle = 30, hjust = 0.5),
        strip.text.x = element_text(size = 8),
        legend.position = "bottom")

Insurance

Descriptive statistics

Overall

#with(cDF, table(insurance, useNA = "al"))

cDF %>%
  group_by(insurance) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "Distribution of insurance") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Distribution of insurance
insurance	n	n.wtd	prop.wtd
emplyr	576	586.9	0.64
indvdl_othr	61	54.9	0.06
medicare_caid	134	120.9	0.13
othr_gov	54	56.2	0.06
uninsrd_NA	98	104.8	0.11

HIV status

Table

tempDF <- cDF %>% 
  mutate(dx.status = ifelse(diag.status == 1, "HIV+", "HIV-")) %>%
  group_by(dx.status, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(dx.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = dx.status,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by Dx status; n =",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx status" = 2))

Distribution of insurance by Dx status; n = 923
	Dx status
insurance	HIV-	HIV+
emplyr	0.64	0.60
indvdl_othr	0.06	0.04
medicare_caid	0.12	0.21
othr_gov	0.05	0.14
uninsrd_NA	0.13	0.01

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = dx.status, fill = dx.status)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))

title.plotly <-
  list(text = paste0('Insurance by HIV Dx',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Age

Table

tempDF <- cDF %>% 
  group_by(age_group, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by age group; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age Group" = 5))

Distribution of insurance by age group; n = 923
	Age Group
insurance	15-24	25-34	35-44	45-54	55-65
emplyr	0.54	0.67	0.71	0.64	0.59
indvdl_othr	0.03	0.04	0.06	0.09	0.07
medicare_caid	0.13	0.12	0.10	0.10	0.22
othr_gov	0.05	0.07	0.06	0.06	0.06
uninsrd_NA	0.26	0.10	0.07	0.11	0.05

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))

title.plotly <-
  list(text = paste0('Insurance distribution by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

Almost all of the uninsured are Black or Hispanic.

tempDF <- cDF %>% 
  group_by(race, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by race; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3))

Distribution of insurance by race; n = 923
	Race
insurance	B	H	O
emplyr	0.47	0.52	0.67
indvdl_othr	0.02	0.02	0.07
medicare_caid	0.13	0.15	0.13
othr_gov	0.18	0.12	0.04
uninsrd_NA	0.20	0.19	0.10

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Insurance distribution by race',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- cDF %>% 
  group_by(region, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by region; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3))

Distribution of insurance by region; n = 923
	Region
insurance	EasternWA	King	WesternWA
emplyr	0.46	0.71	0.56
indvdl_othr	0.10	0.06	0.05
medicare_caid	0.22	0.08	0.19
othr_gov	0.04	0.04	0.10
uninsrd_NA	0.18	0.11	0.10

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  ylab("wtd proportion") +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Insurance distribution by region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

# We show the missing income cases in the table
tempDF <- cDF %>% 
  group_by(income_cat, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by FPL income category; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL income category" = 7))

Distribution of insurance by FPL income category; n = 923
	FPL income category
insurance	fpl138	fpl138-300	fpl300-400	fpl400-500	fpl500-600	fpl600+	Missing
emplyr	0.18	0.49	0.76	0.72	0.92	0.83	0.27
indvdl_othr	0.07	0.08	0.07	0.02	0.02	0.06	0.03
medicare_caid	0.56	0.19	0.08	0.10	NA	0.02	0.15
othr_gov	0.06	0.08	0.06	0.13	0.06	0.05	0.06
uninsrd_NA	0.14	0.16	0.04	0.04	NA	0.04	0.48

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Insurance distribution by FPL income category',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Multinomial regressions

insDF <- cDF %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"))

Model 1

ins_mlm1 <- multinom(insurance ~ age_group, 
                     data = insDF, 
                     weights = ego.wawt)

## # weights:  30 (20 variable)
## initial  value 1486.639404 
## iter  10 value 1047.234884
## iter  20 value 1023.345868
## final  value 1023.317376 
## converged

kable(tidy(ins_mlm1, exponentiate = T), 
      caption= "Model 1", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 1
y.level	term	estimate	std.error	statistic	p.value
indvdl_othr	(Intercept)	0.057	0.467	-6.117	0.000
indvdl_othr	age_group25-34	1.122	0.570	0.203	0.839
indvdl_othr	age_group35-44	1.539	0.555	0.776	0.438
indvdl_othr	age_group45-54	2.418	0.543	1.627	0.104
indvdl_othr	age_group55-65	2.189	0.552	1.420	0.156
medicare_caid	(Intercept)	0.236	0.249	-5.797	0.000
medicare_caid	age_group25-34	0.752	0.324	-0.880	0.379
medicare_caid	age_group35-44	0.568	0.352	-1.605	0.109
medicare_caid	age_group45-54	0.666	0.362	-1.123	0.261
medicare_caid	age_group55-65	1.566	0.313	1.435	0.151
othr_gov	(Intercept)	0.091	0.377	-6.359	0.000
othr_gov	age_group25-34	1.099	0.462	0.205	0.838
othr_gov	age_group35-44	0.966	0.483	-0.072	0.943
othr_gov	age_group45-54	1.037	0.500	0.072	0.942
othr_gov	age_group55-65	1.165	0.493	0.309	0.757
uninsrd_NA	(Intercept)	0.484	0.191	-3.811	0.000
uninsrd_NA	age_group25-34	0.309	0.293	-4.007	0.000
uninsrd_NA	age_group35-44	0.197	0.347	-4.674	0.000
uninsrd_NA	age_group45-54	0.362	0.315	-3.234	0.001
uninsrd_NA	age_group55-65	0.180	0.396	-4.332	0.000

Model 2

ins_mlm2 <- multinom(insurance ~ age_group + race, 
                     data = insDF, 
                     weights = ego.wawt)

## # weights:  40 (28 variable)
## initial  value 1486.639404 
## iter  10 value 1018.895977
## iter  20 value 1002.876330
## iter  30 value 1002.839331
## final  value 1002.839301 
## converged

kable(tidy(ins_mlm2, exponentiate = T), 
      caption= "Model 2", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 2
y.level	term	estimate	std.error	statistic	p.value
indvdl_othr	(Intercept)	0.062	0.470	-5.900	0.000
indvdl_othr	age_group25-34	1.128	0.570	0.212	0.832
indvdl_othr	age_group35-44	1.547	0.556	0.785	0.433
indvdl_othr	age_group45-54	2.363	0.544	1.582	0.114
indvdl_othr	age_group55-65	2.111	0.553	1.352	0.176
indvdl_othr	raceB	0.422	0.942	-0.917	0.359
indvdl_othr	raceH	0.524	0.657	-0.982	0.326
medicare_caid	(Intercept)	0.216	0.257	-5.974	0.000
medicare_caid	age_group25-34	0.753	0.325	-0.875	0.381
medicare_caid	age_group35-44	0.568	0.353	-1.602	0.109
medicare_caid	age_group45-54	0.685	0.363	-1.043	0.297
medicare_caid	age_group55-65	1.631	0.315	1.554	0.120
medicare_caid	raceB	1.492	0.421	0.951	0.342
medicare_caid	raceH	1.594	0.316	1.475	0.140
othr_gov	(Intercept)	0.060	0.403	-7.004	0.000
othr_gov	age_group25-34	1.044	0.470	0.091	0.927
othr_gov	age_group35-44	0.926	0.491	-0.156	0.876
othr_gov	age_group45-54	1.107	0.510	0.199	0.842
othr_gov	age_group55-65	1.323	0.504	0.555	0.579
othr_gov	raceB	6.275	0.394	4.659	0.000
othr_gov	raceH	3.730	0.368	3.575	0.000
uninsrd_NA	(Intercept)	0.388	0.205	-4.618	0.000
uninsrd_NA	age_group25-34	0.301	0.298	-4.031	0.000
uninsrd_NA	age_group35-44	0.193	0.351	-4.681	0.000
uninsrd_NA	age_group45-54	0.377	0.319	-3.058	0.002
uninsrd_NA	age_group55-65	0.194	0.400	-4.102	0.000
uninsrd_NA	raceB	3.324	0.370	3.249	0.001
uninsrd_NA	raceH	2.322	0.302	2.785	0.005

Model 3

ins_mlm3 <- multinom(insurance ~ age_group + race + 
                       region, 
                     data = insDF, 
                     weights = ego.wawt)

## # weights:  50 (36 variable)
## initial  value 1486.639404 
## iter  10 value 992.679394
## iter  20 value 978.742810
## iter  30 value 978.700528
## final  value 978.700493 
## converged

kable(tidy(ins_mlm3, exponentiate = T), 
      caption= "Model 3", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 3
y.level	term	estimate	std.error	statistic	p.value
indvdl_othr	(Intercept)	0.053	0.492	-5.967	0.000
indvdl_othr	age_group25-34	1.160	0.572	0.260	0.795
indvdl_othr	age_group35-44	1.583	0.558	0.824	0.410
indvdl_othr	age_group45-54	2.449	0.546	1.642	0.101
indvdl_othr	age_group55-65	2.140	0.554	1.372	0.170
indvdl_othr	raceB	0.458	0.943	-0.829	0.407
indvdl_othr	raceH	0.479	0.660	-1.115	0.265
indvdl_othr	regionEasternWA	2.798	0.418	2.459	0.014
indvdl_othr	regionWesternWA	1.096	0.325	0.281	0.779
medicare_caid	(Intercept)	0.120	0.292	-7.272	0.000
medicare_caid	age_group25-34	0.807	0.331	-0.650	0.516
medicare_caid	age_group35-44	0.594	0.359	-1.454	0.146
medicare_caid	age_group45-54	0.721	0.369	-0.889	0.374
medicare_caid	age_group55-65	1.647	0.321	1.553	0.120
medicare_caid	raceB	1.750	0.428	1.308	0.191
medicare_caid	raceH	1.467	0.325	1.180	0.238
medicare_caid	regionEasternWA	3.958	0.321	4.283	0.000
medicare_caid	regionWesternWA	2.761	0.224	4.541	0.000
othr_gov	(Intercept)	0.035	0.443	-7.597	0.000
othr_gov	age_group25-34	1.084	0.475	0.171	0.864
othr_gov	age_group35-44	0.926	0.496	-0.156	0.876
othr_gov	age_group45-54	1.080	0.515	0.149	0.882
othr_gov	age_group55-65	1.289	0.508	0.499	0.618
othr_gov	raceB	7.024	0.405	4.809	0.000
othr_gov	raceH	3.805	0.376	3.557	0.000
othr_gov	regionEasternWA	1.601	0.585	0.806	0.421
othr_gov	regionWesternWA	3.247	0.304	3.878	0.000
uninsrd_NA	(Intercept)	0.331	0.230	-4.805	0.000
uninsrd_NA	age_group25-34	0.308	0.300	-3.927	0.000
uninsrd_NA	age_group35-44	0.198	0.353	-4.591	0.000
uninsrd_NA	age_group45-54	0.394	0.321	-2.902	0.004
uninsrd_NA	age_group55-65	0.197	0.401	-4.056	0.000
uninsrd_NA	raceB	3.492	0.373	3.357	0.001
uninsrd_NA	raceH	2.161	0.306	2.515	0.012
uninsrd_NA	regionEasternWA	2.424	0.340	2.605	0.009
uninsrd_NA	regionWesternWA	1.174	0.250	0.641	0.522

Model 4

ins_mlm4 <- multinom(insurance ~ age_group + race + 
                       region + diag.status, 
                     data = insDF, 
                     weights = ego.wawt)

## # weights:  55 (40 variable)
## initial  value 1486.639404 
## iter  10 value 983.360636
## iter  20 value 968.193265
## iter  30 value 968.135699
## final  value 968.135623 
## converged

kable(tidy(ins_mlm4, exponentiate = T), 
      caption= "Model 4", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 4
y.level	term	estimate	std.error	statistic	p.value
indvdl_othr	(Intercept)	0.053	0.492	-5.957	0.000
indvdl_othr	age_group25-34	1.171	0.572	0.276	0.783
indvdl_othr	age_group35-44	1.636	0.559	0.881	0.378
indvdl_othr	age_group45-54	2.557	0.547	1.715	0.086
indvdl_othr	age_group55-65	2.351	0.563	1.519	0.129
indvdl_othr	raceB	0.457	0.943	-0.830	0.407
indvdl_othr	raceH	0.485	0.661	-1.094	0.274
indvdl_othr	regionEasternWA	2.757	0.419	2.420	0.016
indvdl_othr	regionWesternWA	1.079	0.326	0.234	0.815
indvdl_othr	diag.status	0.639	0.540	-0.828	0.407
medicare_caid	(Intercept)	0.118	0.292	-7.297	0.000
medicare_caid	age_group25-34	0.796	0.331	-0.690	0.490
medicare_caid	age_group35-44	0.558	0.362	-1.613	0.107
medicare_caid	age_group45-54	0.671	0.372	-1.073	0.283
medicare_caid	age_group55-65	1.423	0.336	1.052	0.293
medicare_caid	raceB	1.724	0.429	1.270	0.204
medicare_caid	raceH	1.445	0.325	1.134	0.257
medicare_caid	regionEasternWA	4.094	0.323	4.370	0.000
medicare_caid	regionWesternWA	2.815	0.225	4.609	0.000
medicare_caid	diag.status	1.680	0.312	1.662	0.096
othr_gov	(Intercept)	0.035	0.443	-7.574	0.000
othr_gov	age_group25-34	1.017	0.476	0.036	0.971
othr_gov	age_group35-44	0.725	0.512	-0.627	0.530
othr_gov	age_group45-54	0.884	0.523	-0.236	0.813
othr_gov	age_group55-65	0.860	0.539	-0.279	0.781
othr_gov	raceB	6.845	0.411	4.676	0.000
othr_gov	raceH	3.493	0.380	3.290	0.001
othr_gov	regionEasternWA	1.787	0.586	0.991	0.322
othr_gov	regionWesternWA	3.314	0.306	3.911	0.000
othr_gov	diag.status	3.037	0.394	2.822	0.005
uninsrd_NA	(Intercept)	0.332	0.230	-4.783	0.000
uninsrd_NA	age_group25-34	0.316	0.300	-3.844	0.000
uninsrd_NA	age_group35-44	0.220	0.354	-4.281	0.000
uninsrd_NA	age_group45-54	0.442	0.323	-2.523	0.012
uninsrd_NA	age_group55-65	0.256	0.406	-3.361	0.001
uninsrd_NA	raceB	3.451	0.374	3.314	0.001
uninsrd_NA	raceH	2.232	0.309	2.600	0.009
uninsrd_NA	regionEasternWA	2.327	0.341	2.479	0.013
uninsrd_NA	regionWesternWA	1.154	0.251	0.571	0.568
uninsrd_NA	diag.status	0.097	1.186	-1.967	0.049

Model 5

We keep the missing income level as a predictor in the model

ins_mlm5 <- multinom(insurance ~ age_group + race + 
                       region + diag.status + income_cat, 
                     data = insDF, 
                     weights = ego.wawt)

## # weights:  85 (64 variable)
## initial  value 1486.639404 
## iter  10 value 846.422845
## iter  20 value 810.564439
## iter  30 value 810.111301
## iter  40 value 809.984911
## iter  50 value 809.966462
## iter  60 value 809.964342
## final  value 809.964118 
## converged

kable(tidy(ins_mlm5, exponentiate = T), 
      caption= "Model 5", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 5
y.level	term	estimate	std.error	statistic	p.value
indvdl_othr	(Intercept)	0.022	0.559	-6.790	0.000
indvdl_othr	age_group25-34	1.842	0.591	1.034	0.301
indvdl_othr	age_group35-44	3.048	0.587	1.900	0.057
indvdl_othr	age_group45-54	5.316	0.583	2.865	0.004
indvdl_othr	age_group55-65	4.293	0.596	2.446	0.014
indvdl_othr	raceB	0.422	0.947	-0.912	0.362
indvdl_othr	raceH	0.452	0.671	-1.183	0.237
indvdl_othr	regionEasternWA	2.089	0.438	1.680	0.093
indvdl_othr	regionWesternWA	0.859	0.340	-0.446	0.656
indvdl_othr	diag.status	0.580	0.552	-0.988	0.323
indvdl_othr	income_catfpl138	8.817	0.553	3.933	0.000
indvdl_othr	income_catfpl138-300	3.382	0.378	3.221	0.001
indvdl_othr	income_catfpl300-400	1.716	0.510	1.059	0.290
indvdl_othr	income_catfpl400-500	0.397	1.149	-0.803	0.422
indvdl_othr	income_catfpl500-600	0.466	1.378	-0.554	0.580
indvdl_othr	income_catMissing	2.254	0.707	1.149	0.251
medicare_caid	(Intercept)	0.007	0.489	-10.252	0.000
medicare_caid	age_group25-34	2.052	0.392	1.834	0.067
medicare_caid	age_group35-44	1.900	0.438	1.465	0.143
medicare_caid	age_group45-54	3.068	0.457	2.453	0.014
medicare_caid	age_group55-65	5.930	0.418	4.255	0.000
medicare_caid	raceB	1.497	0.511	0.791	0.429
medicare_caid	raceH	1.286	0.379	0.664	0.507
medicare_caid	regionEasternWA	2.080	0.386	1.896	0.058
medicare_caid	regionWesternWA	1.603	0.269	1.752	0.080
medicare_caid	diag.status	1.556	0.382	1.157	0.247
medicare_caid	income_catfpl138	167.038	0.474	10.796	0.000
medicare_caid	income_catfpl138-300	15.340	0.409	6.668	0.000
medicare_caid	income_catfpl300-400	3.927	0.558	2.451	0.014
medicare_caid	income_catfpl400-500	5.874	0.593	2.988	0.003
medicare_caid	income_catfpl500-600	0.000	358.988	-0.031	0.975
medicare_caid	income_catMissing	23.694	0.515	6.150	0.000
othr_gov	(Intercept)	0.016	0.528	-7.819	0.000
othr_gov	age_group25-34	1.360	0.496	0.620	0.535
othr_gov	age_group35-44	1.223	0.543	0.371	0.711
othr_gov	age_group45-54	1.620	0.561	0.860	0.390
othr_gov	age_group55-65	1.500	0.561	0.723	0.470
othr_gov	raceB	6.852	0.421	4.572	0.000
othr_gov	raceH	3.181	0.391	2.960	0.003
othr_gov	regionEasternWA	1.439	0.595	0.611	0.541
othr_gov	regionWesternWA	2.663	0.316	3.103	0.002
othr_gov	diag.status	2.857	0.404	2.601	0.009
othr_gov	income_catfpl138	6.317	0.588	3.134	0.002
othr_gov	income_catfpl138-300	2.549	0.417	2.245	0.025
othr_gov	income_catfpl300-400	1.130	0.577	0.212	0.832
othr_gov	income_catfpl400-500	2.615	0.529	1.818	0.069
othr_gov	income_catfpl500-600	1.247	0.917	0.241	0.810
othr_gov	income_catMissing	3.902	0.598	2.275	0.023
uninsrd_NA	(Intercept)	0.058	0.383	-7.430	0.000
uninsrd_NA	age_group25-34	0.616	0.341	-1.419	0.156
uninsrd_NA	age_group35-44	0.627	0.404	-1.155	0.248
uninsrd_NA	age_group45-54	1.379	0.395	0.812	0.417
uninsrd_NA	age_group55-65	0.508	0.465	-1.457	0.145
uninsrd_NA	raceB	4.036	0.404	3.454	0.001
uninsrd_NA	raceH	2.461	0.354	2.544	0.011
uninsrd_NA	regionEasternWA	1.668	0.392	1.306	0.192
uninsrd_NA	regionWesternWA	0.777	0.285	-0.887	0.375
uninsrd_NA	diag.status	0.105	1.199	-1.882	0.060
uninsrd_NA	income_catfpl138	13.954	0.486	5.426	0.000
uninsrd_NA	income_catfpl138-300	5.617	0.366	4.712	0.000
uninsrd_NA	income_catfpl300-400	0.932	0.664	-0.106	0.915
uninsrd_NA	income_catfpl400-500	0.925	0.787	-0.099	0.921
uninsrd_NA	income_catfpl500-600	0.000	525.987	-0.024	0.980
uninsrd_NA	income_catMissing	35.040	0.400	8.899	0.000

Comparison of AICs

models <- get_formula("ins_mlm", 5)
aics <- get_aics("ins_mlm", 5, "mlm")

kable(data.frame(Model = models, 
                 AIC = aics), 
      caption= paste("Comparison of AICs; n =", dim(cDF)[[1]]),
      digits = 0) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Comparison of AICs; n = 923
Model	AIC
insurance ~ age_group	2087
insurance ~ age_group + race	2062
insurance ~ age_group + race + region	2029
insurance ~ age_group + race + region + diag.status	2016
insurance ~ age_group + race + region + diag.status + income_cat	1748

Income is a powerful predictor, but we can’t use it in simulation, so we will use model 4.

Predicted distribution

The final model includes age, race, region and diagnostic status.

#### Construct and save output
expand_df <- expand.grid(
  age_group = names(table(wDF$age_group)),
  race = c("B", "H", "O"), 
  region = c("EasternWA", "King", "WesternWA"), 
  diag.status = c(0, 1)
  )

pred_ins <- cbind(expand_df, 
                  predict(ins_mlm4, 
                          newdata = expand_df, 
                          "probs")) 

# For some reason ZK included age_group in the model/prediction, but she
# averaged over it in the saved DF.  We don't do that here.  We also use
# pivot_longer in place of gathers

pred_insl <- pred_ins %>% 
  mutate(diag.status = ifelse(diag.status == 0, "HIV-", "HIV+")) %>%
  pivot_longer(c(emplyr:uninsrd_NA), 
               names_to = "insurance", 
               values_to = "prob")

# Save to output object
save_out$pred_ins <- pred_insl

By race, region and Dx

ggplot(data = pred_insl,
       aes(x = race, y = prob/6, # sums over omitted var age_group
           group = insurance, fill = insurance)) +
  geom_bar(stat="identity") +
  facet_wrap(diag.status ~ region) +
  labs(title = "Insurance distribution by race, Dx and region",
       caption = paste("Multinomial regression results based on n =",
                       dim(cDF)[[1]],
                       "; Dx HIV+ =",
                       scales::percent(proportions(table(cDF$diag.status))[[2]], 
                                       accuracy = 0.1),
                       "; assumes homogenous age distributions"),
       y = "predicted distribution") + 
  theme(strip.text = element_text(size = 10),
        axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12), 
        axis.text.x = element_text(size = 10, hjust = 0.5), 
        axis.text.y = element_text(size = 10), 
        legend.position = "bottom")

By age, region and Dx

ggplot(data = pred_insl,
       aes(x = age_group, y = prob/3, # sums over omitted var race 
           group = insurance, fill = insurance)) +
  geom_bar(stat="identity") +
  facet_wrap(diag.status ~ region) +
  labs(title = "Insurance distribution by Age, Dx and region",
       caption = paste("Multinomial regression results based on n =",
                       dim(cDF)[[1]],
                       "; Dx HIV+ =",
                       scales::percent(proportions(table(cDF$diag.status))[[2]], 
                                       accuracy = 0.1),
                       "; assumes homogenous race distributions"),
       y = "predicted distribution") + 
  theme(strip.text = element_text(size = 10),
        axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12), 
        axis.text.x = element_text(size = 10, hjust = 0.5), 
        axis.text.y = element_text(size = 10), 
        legend.position = "bottom")

PrEP aware HIV-

Descriptive statistics

Overall

Very few HIV- are not aware of PrEP.

#with(wDF, table(PREP_AWARE, useNA = "al")

baseDF <- cDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) 

tempDF <- baseDF %>%
  group_by(PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of PrEP awareness") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))  %>%
  footnote(paste("HIV- ; n = ",
                 sum(tempDF$n)))

Distribution of PrEP awareness
PREP_AWARE	n	n.wtd	prop.wtd
aware	697	691.1	0.93
unaware	52	49.8	0.07
Note:
HIV- ; n = 749

Age

Table

tempDF <- baseDF %>%
  group_by(age_group, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= "Distribution of PrEP awareness by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))

Distribution of PrEP awareness by age
	Age group
PREP_AWARE	15-24	25-34	35-44	45-54	55-65
aware	0.9	0.96	0.95	0.95	0.88
unaware	0.1	0.04	0.05	0.05	0.12
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) +
  scale_colour_brewer(palette = "YlOrRd")

title.plotly <-
  list(text = paste0('PrEP awareness by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

tempDF <- baseDF %>%
  group_by(race, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))

Distribution of PrEP awareness by race
	Race
PREP_AWARE	B	H	O
aware	0.93	0.79	0.95
unaware	0.07	0.21	0.05
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- wDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) %>%
  group_by(region, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))

Distribution of PrEP awareness by region
	Region
PREP_AWARE	EasternWA	King	WesternWA
aware	0.8	0.97	0.91
unaware	0.2	0.03	0.09
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- wDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) %>%
  group_by(insurance, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))

Distribution of PrEP awareness by insurance
	Insurance
PREP_AWARE	emplyr	indvdl_othr	medicare_caid	othr_gov	uninsrd_NA
aware	0.96	0.99	0.89	0.91	0.75
unaware	0.04	0.01	0.11	0.09	0.25
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- wDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) %>%
  group_by(income_cat, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Income" = 7))  %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))

Distribution of PrEP awareness by FPL Income Category
	Income
PREP_AWARE	fpl138	fpl138-300	fpl300-400	fpl400-500	fpl500-600	fpl600+	Missing
aware	0.8	0.93	0.95	0.94	0.96	0.96	0.89
unaware	0.2	0.07	0.05	0.06	0.04	0.04	0.11
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Logistic regression

Model 1

library(survey)

prepDF <- cDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2) %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"),
         insurance = relevel(factor(insurance), ref = "emplyr"))

design <- svydesign(id = ~Vrid, 
                    data = prepDF, 
                    weights = ~ego.wawt)

logitm1 <- svyglm(PREP_AWARE ~ age_group,
                  data = prepDF, 
                  family = quasibinomial(), 
                  design)

kable(tidy(logitm1, exponentiate = T), 
      caption= "Model 1", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 1
term	estimate	std.error	statistic	p.value
(Intercept)	8.83	0.32	6.89	0.00
age_group25-34	2.45	0.45	1.97	0.05
age_group35-44	2.34	0.54	1.58	0.11
age_group45-54	2.01	0.47	1.47	0.14
age_group55-65	0.86	0.46	-0.34	0.74

Model 2

logitm2 <- svyglm(PREP_AWARE ~ race,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm2, exponentiate = T), 
      caption= "Model 2", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 2
term	estimate	std.error	statistic	p.value
(Intercept)	20.17	0.18	16.27	0.00
raceB	0.63	0.71	-0.64	0.52
raceH	0.19	0.37	-4.59	0.00

Model 3

logitm3 <- svyglm(PREP_AWARE ~ region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm3, exponentiate = T), 
      caption= "Model 3", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 3
term	estimate	std.error	statistic	p.value
(Intercept)	35.08	0.35	10.29	0.0000
regionEasternWA	0.11	0.46	-4.74	0.0000
regionWesternWA	0.28	0.41	-3.11	0.0019

Model 4

logitm4 <- svyglm(PREP_AWARE ~ insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm4, exponentiate = T), 
      caption= "Model 4", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 4
term	estimate	std.error	statistic	p.value
(Intercept)	23.25	0.24	12.85	0.0000
insuranceindvdl_othr	3.03	1.04	1.06	0.2874
insurancemedicare_caid	0.34	0.42	-2.55	0.0109
insuranceothr_gov	0.46	0.59	-1.31	0.1902
insuranceuninsrd_NA	0.13	0.41	-4.94	0.0000

Model 5

logitm5 <- svyglm(PREP_AWARE ~ race + region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 5
term	estimate	std.error	statistic	p.value
(Intercept)	49.61	0.38	10.19	0.0000
raceB	0.54	0.72	-0.84	0.4023
raceH	0.21	0.36	-4.42	0.0000
regionEasternWA	0.13	0.45	-4.59	0.0000
regionWesternWA	0.27	0.42	-3.12	0.0019

Model 6

logitm6 <- svyglm(PREP_AWARE ~ race + region + insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm6, exponentiate = T), 
      caption= "Model 6", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 6
term	estimate	std.error	statistic	p.value
(Intercept)	62.86	0.41	10.03	0.0000
raceB	0.62	0.71	-0.69	0.4930
raceH	0.26	0.42	-3.25	0.0012
regionEasternWA	0.14	0.49	-3.99	0.0001
regionWesternWA	0.30	0.42	-2.88	0.0041
insuranceindvdl_othr	3.17	1.03	1.12	0.2629
insurancemedicare_caid	0.50	0.42	-1.62	0.1051
insuranceothr_gov	0.98	0.74	-0.03	0.9744
insuranceuninsrd_NA	0.21	0.44	-3.63	0.0003

Comparison of AICs

models <- get_formula("logitm", 6)
aics <- get_aics("logitm", 6, "logit")

kable(data.frame(Model = models, 
                 AIC = aics), 
      caption= "AIC comparison", digits = 1) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

AIC comparison
Model	AIC
PREP_AWARE ~ age_group	368.5
PREP_AWARE ~ race	351.6
PREP_AWARE ~ region	343.2
PREP_AWARE ~ insurance	345.4
PREP_AWARE ~ race + region	328.8
PREP_AWARE ~ race + region + insurance	319.3

Insurance is a strong predictor, but we can’t simulate with it, so we will use model 5 with race + region.

Predicted distribution

#### Construct and save output
expand_df <- expand.grid(
  race = levels(prepDF$race),
  region = levels(prepDF$region))

pred_df <- cbind(expand_df, 
                 prob = predict(logitm5, 
                                newdata = expand_df, 
                                type = "response", 
                                npop = 1000))
pred_df <- pred_df %>% 
  select(-prob.SE) %>%
  rename(prob = prob.response)

## Save objects
save_out$pred_prep_aware <- pred_df

fig <- ggplot(data = pred_df, 
              aes(x = region, y = prob, text = round(prob,2),
                  group = race, fill = race)) +
  geom_bar(stat = "identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(plot.margin = margin(t=30,r=20,b=20,l=20),
        legend.position = "bottom")

title.plotly <-
  list(text = paste0('Predicted PrEP Awareness by race and region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     dim(prepDF)[[1]],
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Ever used PrEP HIV-

For PrEP aware respondents.

Descriptive statistics

Overall

#with(wDF, table(diag.status, PREP_EVER, useNA ="al"))
# there are a handful of HIV+ who have ever used prep, we exclude them

baseDF <- cDF %>%
  filter(diag.status == 0 & PREP_EVER < 2) %>%
  mutate(PREP_EVER = ifelse(PREP_EVER== 1, 
                            "ever used prep",
                            "never used prep")) 

tempDF <- baseDF %>%
  group_by(PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of ever PrEP (HIV-)") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Distribution of ever PrEP (HIV-)
PREP_EVER	n	n.wtd	prop.wtd
ever used prep	222	225.2	0.37
never used prep	395	387.8	0.63

Age

Table

tempDF <- baseDF %>%
  group_by(age_group, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= "Distribution of Ever PrEP use by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of Ever PrEP use by age
	Age group
PREP_EVER	15-24	25-34	35-44	45-54	55-65
ever used prep	0.21	0.42	0.4	0.41	0.28
never used prep	0.79	0.58	0.6	0.59	0.72
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Ever PrEP use by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

tempDF <- baseDF %>%
  group_by(race, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of Ever PrEP use by race
	Race
PREP_EVER	B	H	O
ever used prep	0.44	0.29	0.37
never used prep	0.56	0.71	0.63
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- baseDF %>%
  group_by(region, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of Ever PrEP use by region
	Region
PREP_EVER	EasternWA	King	WesternWA
ever used prep	0.21	0.42	0.31
never used prep	0.79	0.58	0.69
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- baseDF %>%
  group_by(insurance, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of Ever PrEP use by insurance
	Insurance
PREP_EVER	emplyr	indvdl_othr	medicare_caid	othr_gov	uninsrd_NA
ever used prep	0.38	0.4	0.35	0.41	0.21
never used prep	0.62	0.6	0.65	0.59	0.79
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- baseDF %>%
  group_by(income_cat, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL Income category" = 7)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of Ever PrEP use by FPL Income Category
	FPL Income category
PREP_EVER	fpl138	fpl138-300	fpl300-400	fpl400-500	fpl500-600	fpl600+	Missing
ever used prep	0.16	0.3	0.45	0.42	0.61	0.41	0.23
never used prep	0.84	0.7	0.55	0.58	0.39	0.59	0.77
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Logistic regression

Model 1

prepDF <- cDF %>%
  filter(diag.status == 0 & PREP_EVER < 2) %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"),
         insurance = relevel(factor(insurance), ref = "emplyr"))

design <- svydesign(id = ~Vrid, 
                    data = prepDF, 
                    weights = ~ego.wawt)

logitm1 <- svyglm(PREP_EVER ~ age_group,
                  data = prepDF, 
                  family = quasibinomial(), 
                  design)

kable(tidy(logitm1, exponentiate = T), 
      caption= "Model 1", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 1
term	estimate	std.error	statistic	p.value
(Intercept)	0.27	0.32	-4.15	0.00
age_group25-34	2.74	0.35	2.88	0.00
age_group35-44	2.51	0.39	2.37	0.02
age_group45-54	2.61	0.38	2.56	0.01
age_group55-65	1.42	0.40	0.88	0.38

Model 2

logitm2 <- svyglm(PREP_EVER ~ race,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm2, exponentiate = T), 
      caption= "Model 2", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 2
term	estimate	std.error	statistic	p.value
(Intercept)	0.59	0.09	-5.77	0.00
raceB	1.35	0.66	0.45	0.65
raceH	0.70	0.32	-1.12	0.26

Model 3

logitm3 <- svyglm(PREP_EVER ~ region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm3, exponentiate = T), 
      caption= "Model 3", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 3
term	estimate	std.error	statistic	p.value
(Intercept)	0.72	0.13	-2.53	0.0117
regionEasternWA	0.37	0.33	-3.05	0.0024
regionWesternWA	0.63	0.19	-2.38	0.0176

Model 4

logitm4 <- svyglm(PREP_EVER ~ insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm4, exponentiate = T), 
      caption= "Model 4", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 4
term	estimate	std.error	statistic	p.value
(Intercept)	0.60	0.11	-4.45	0.0000
insuranceindvdl_othr	1.12	0.34	0.34	0.7377
insurancemedicare_caid	0.89	0.28	-0.41	0.6802
insuranceothr_gov	1.16	0.38	0.39	0.6998
insuranceuninsrd_NA	0.45	0.47	-1.69	0.0907

Model 5

logitm5 <- svyglm(PREP_EVER ~ income_cat,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 5
term	estimate	std.error	statistic	p.value
(Intercept)	0.68	0.14	-2.68	0.0076
income_catfpl138	0.27	0.38	-3.37	0.0008
income_catfpl138-300	0.61	0.25	-1.96	0.0500
income_catfpl300-400	1.18	0.31	0.54	0.5891
income_catfpl400-500	1.08	0.36	0.21	0.8366
income_catfpl500-600	2.26	0.50	1.64	0.1015
income_catMissing	0.45	0.54	-1.49	0.1363

Model 6

logitm6 <- svyglm(PREP_EVER ~ age_group + region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm6, exponentiate = T), 
      caption= "Model 6", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 6
term	estimate	std.error	statistic	p.value
(Intercept)	0.34	0.33	-3.23	0.0013
age_group25-34	2.63	0.35	2.73	0.0064
age_group35-44	2.37	0.39	2.23	0.0263
age_group45-54	2.48	0.38	2.38	0.0177
age_group55-65	1.39	0.40	0.82	0.4115
regionEasternWA	0.38	0.33	-3.01	0.0028
regionWesternWA	0.68	0.19	-2.01	0.0450

Model 7

logitm7 <- svyglm(PREP_EVER ~ age_group + region + income_cat,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm7, exponentiate = T), 
      caption= "Model 7", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 7
term	estimate	std.error	statistic	p.value
(Intercept)	0.41	0.36	-2.47	0.0138
age_group25-34	2.21	0.36	2.21	0.0278
age_group35-44	1.96	0.39	1.73	0.0848
age_group45-54	2.05	0.39	1.84	0.0665
age_group55-65	1.27	0.41	0.58	0.5620
regionEasternWA	0.39	0.33	-2.81	0.0051
regionWesternWA	0.72	0.20	-1.65	0.0990
income_catfpl138	0.38	0.39	-2.51	0.0124
income_catfpl138-300	0.74	0.25	-1.19	0.2345
income_catfpl300-400	1.41	0.31	1.09	0.2741
income_catfpl400-500	1.13	0.36	0.35	0.7296
income_catfpl500-600	2.34	0.52	1.63	0.1030
income_catMissing	0.63	0.56	-0.84	0.4031

Comparison of AICs

models <- get_formula("logitm", 7)
aics <- get_aics("logitm", 7, "logit")

kable(data.frame(Model = models, 
                 AIC = aics),
      caption= "AIC comparison", digits = 1) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

AIC comparison
Model	AIC
PREP_EVER ~ age_group	804.7
PREP_EVER ~ race	819.1
PREP_EVER ~ region	803.0
PREP_EVER ~ insurance	815.9
PREP_EVER ~ income_cat	800.1
PREP_EVER ~ age_group + region	798.0
PREP_EVER ~ age_group + region + income_cat	795.9

Income has moderate predictive value, but we can’t use it in simulation, so we use model 6 with age and region.

Predicted distribution

Construct and save output

expand_df <- expand.grid(
  age_group = levels(prepDF$age_group),
  region = levels(prepDF$region))

pred_df <- cbind(expand_df, 
                 prob = predict(logitm6, 
                                newdata = expand_df, 
                                type = "response", 
                                npop = 1000))
pred_df <- pred_df %>% 
  select(-prob.SE) %>%
  rename(prob = prob.response)

## Save objects
save_out$pred_prep_ever <- pred_df

Plot

fig <- ggplot(data = pred_df, 
              aes(x = age_group, y = prob, text = round(prob,2),
                  group = region, color = region)) +
  geom_line(alpha = 0.7) +
  geom_point() +
  labs(x = "age group", y = "predicted probability") +
  theme(plot.margin = margin(t=30,r=20,b=20,l=20),
        legend.position = "bottom")

title.plotly <-
  list(text = paste0('Predicted Ever PrEP use by age and region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     dim(prepDF)[[1]],
                     '; race was not a significant predictor',
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Current PrEP use

This is not used as an input parameter but as a target or validation at the end.
Variable: PREP_CURRENT

Descriptive statistics

Of the 227 respondents who report ever taking PrEP, 71% are currently on PrEP.

Overall

# NA is a mix of not aware + never prep; we recode them to 0
#with(wDF, table(diag.status, PREP_CURRENT, useNA ="al"))

baseDF <- cDF %>%
  filter(diag.status == 0) %>%
  mutate(PREP_CURRENT = ifelse(is.na(PREP_CURRENT) | PREP_CURRENT == 0, 
                            "not on prep",
                            "on prep")) 

tempDF <- baseDF %>%
  group_by(PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of current PrEP use (HIV-)") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Distribution of current PrEP use (HIV-)
PREP_CURRENT	n	n.wtd	prop.wtd
not on prep	670	661.0	0.8
on prep	161	167.5	0.2

Age

Table

prep_curr_age <- baseDF %>%
  group_by(age_group, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

prep_curr_age %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= "Distribution of current PrEP use use by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(prep_curr_age$n)))

Distribution of current PrEP use use by age
	Age group
PREP_CURRENT	15-24	25-34	35-44	45-54	55-65
not on prep	0.94	0.77	0.74	0.72	0.85
on prep	0.06	0.23	0.26	0.28	0.15
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(prep_curr_age, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Current PrEP use by age',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

prep_curr_race <- baseDF %>%
  group_by(race, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

prep_curr_race %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(prep_curr_race$n)))

Distribution of current PrEP use use by race
	Race
PREP_CURRENT	B	H	O
not on prep	0.69	0.87	0.8
on prep	0.31	0.12	0.2
Note:
HIV- proportions; n = 831

# What ZK saved was just a table of current prep use by race.  
# But we will want to use the full attr breakdown for targets

Plot

fig <- ggplot(prep_curr_race, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

prep_curr_region <- baseDF %>%
  group_by(region, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

prep_curr_region %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(prep_curr_region$n)))

Distribution of current PrEP use use by region
	Region
PREP_CURRENT	EasternWA	King	WesternWA
not on prep	0.92	0.75	0.84
on prep	0.08	0.25	0.16
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(prep_curr_region, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- baseDF %>%
  group_by(insurance, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of current PrEP use use by insurance
	Insurance
PREP_CURRENT	emplyr	indvdl_othr	medicare_caid	othr_gov	uninsrd_NA
not on prep	0.76	0.73	0.87	0.79	0.97
on prep	0.24	0.27	0.13	0.21	0.03
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- baseDF %>%
  group_by(income_cat, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL Income category" = 7)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))

Distribution of current PrEP use use by FPL Income Category
	FPL Income category
PREP_CURRENT	fpl138	fpl138-300	fpl300-400	fpl400-500	fpl500-600	fpl600+	Missing
not on prep	0.95	0.87	0.7	0.78	0.59	0.73	0.96
on prep	0.05	0.13	0.3	0.22	0.41	0.27	0.04
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

SNAP

Table

tempDF <- wDF %>% select(PREP_CURRENT, snap, ego.wawt) %>%
  mutate(snapt = pmin(snap, 20)) %>%
  group_by(PREP_CURRENT, snapt) %>%
  summarize(num = round(sum(ego.wawt))) %>%
  group_by(snapt) %>%
  mutate(snap.n = sum(num),
         prop = round(100*num/sum(num), 1)) 

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  pivot_wider(names_from = snapt) %>%
  kable(caption= "Current PrEP use use by SNAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$snap.n), 
                 "; snap topcoded at 5+"))

Current PrEP use use by SNAP
PREP_CURRENT	num	snap.n	prop
1	3	110	2.7
1	13	229	5.7
1	9	67	13.4
1	23	57	40.4
1	14	36	38.9
1	8	40	20.0
1	11	22	50.0
1	1	7	14.3
1	6	17	35.3
1	4	5	80.0
1	8	23	34.8
1	4	15	26.7
1	1	1	100.0
1	15	17	88.2
1	1	3	33.3
1	39	70	55.7
1	8	207	3.9
Note:
HIV- proportions; n = 2066 ; snap topcoded at 5+

Plot

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  ggplot(aes(x=snapt, y=prop, weight=snap.n)) +
  ylim(c(0, 100)) +
  labs(title = "Current PrEP use by SNAP",
       x = "Number of anal sex partners last year",
       y = "Proportion on PrEP") +
  geom_line() +
  geom_point(aes(size=snap.n), color = "blue", alpha = 0.5)+
  geom_smooth(span = 1)

Deg.tot

Table

tempDF <- wDF %>% select(PREP_CURRENT, deg.tot, ego.wawt) %>%
  filter(!is.na(deg.tot)) %>%
  mutate(deg.tot = pmin(deg.tot, 3)) %>%
  group_by(PREP_CURRENT, deg.tot) %>%
  summarize(num = round(sum(ego.wawt))) %>%
  group_by(deg.tot) %>%
  mutate(deg.tot.n = sum(num),
         prop = round(100*num/sum(num), 1)) 

tempDF %>%   filter(PREP_CURRENT == 1) %>%
  kable(caption= "Current PrEP use use by deg.tot") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$deg.tot.n), 
                 "; deg.tot topcoded at 3+"))

Current PrEP use use by deg.tot
PREP_CURRENT	deg.tot	num	deg.tot.n	prop
1	0	25	281	8.9
1	1	50	292	17.1
1	2	22	81	27.2
1	3	63	116	54.3
Note:
HIV- proportions; n = 1540 ; deg.tot topcoded at 3+

Plot

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  ggplot(aes(x=deg.tot, y=prop, weight=deg.tot.n)) +
    ylim(c(0, 100)) +
    labs(title = "Current PrEP use by total degree",
    caption = paste("HIV- proportions; n = ",
      sum(tempDF$deg.tot.n), 
      "; deg.tot topcoded at 3+"),
    x = "Degree (Main + Casl)",
    y = "Proportion on PrEP") +
  geom_line() +
  geom_point(aes(size=deg.tot.n), color = "blue", alpha = 0.5)

Deg tot by SNAP

Plot

tempDF <- wDF %>% 
  select(PREP_CURRENT, snap, deg.tot, ego.wawt) %>%
  filter(!is.na(snap) & !is.na(deg.tot)) %>%
  mutate(snapt = pmin(snap, 5),
         conc = ifelse(deg.tot < 2, 0, 1)) %>%
  group_by(PREP_CURRENT, snapt, conc) %>%
  summarize(num = round(sum(ego.wawt))) %>%
  group_by(snapt, conc) %>%
  mutate(snap.n = sum(num),
         prop = round(100*num/sum(num), 1)) 

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  ggplot(aes(x=snapt, y=prop, 
             group = factor(conc), color = factor(conc),
             weight=snap.n)) +
  ylim(c(0, 100)) +
  labs(title = "Current PrEP use by SNAP & Concurrent Persistent P",
       x = "Number of anal sex partners last year",
       y = "Proportion on PrEP") +
  geom_line() +
  geom_point(aes(size=snap.n), alpha = 0.5)

Prediction

Logistic regression

Model 1

prepDF <- baseDF %>%
  filter(!is.na(snap)) %>%
  mutate(PREP_CURRENT = ifelse(PREP_CURRENT=="on prep", 1, 0),
         age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"),
         insurance = relevel(factor(insurance), ref = "emplyr"),
         snapt = pmin(snap, 20),
         snapb = ifelse(snap > 5, 1, 0),
         deg.tot = pmin(deg.tot, 3),
         deg.tot.b = ifelse(deg.tot==3, 1, 0))

design <- svydesign(id = ~Vrid, 
                    data = prepDF, 
                    weights = ~ego.wawt)

logitm1 <- svyglm(PREP_CURRENT ~ age_group,
                  data = prepDF, 
                  family = quasibinomial(), 
                  design)

kable(tidy(logitm1, exponentiate = T), 
      caption= "Model 1", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 1
term	estimate	std.error	statistic	p.value
(Intercept)	0.10	0.44	-5.37	0.00
age_group25-34	3.60	0.47	2.73	0.01
age_group35-44	4.76	0.51	3.05	0.00
age_group45-54	5.28	0.49	3.43	0.00
age_group55-65	2.71	0.51	1.96	0.05

Model 2

logitm2 <- svyglm(PREP_CURRENT ~ race,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm2, exponentiate = T), 
      caption= "Model 2", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 2
term	estimate	std.error	statistic	p.value
(Intercept)	0.33	0.10	-10.90	0.00
raceB	2.56	0.65	1.44	0.15
raceH	0.53	0.34	-1.85	0.07

Model 3

logitm3 <- svyglm(PREP_CURRENT ~ region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm3, exponentiate = T), 
      caption= "Model 3", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 3
term	estimate	std.error	statistic	p.value
(Intercept)	0.42	0.15	-5.83	0.0000
regionEasternWA	0.27	0.38	-3.50	0.0005
regionWesternWA	0.61	0.21	-2.34	0.0198

Model 4

logitm4 <- svyglm(PREP_CURRENT ~ insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm4, exponentiate = T), 
      caption= "Model 4", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 4
term	estimate	std.error	statistic	p.value
(Intercept)	0.37	0.13	-7.52	0.0000
insuranceindvdl_othr	1.31	0.36	0.74	0.4573
insurancemedicare_caid	0.46	0.34	-2.27	0.0236
insuranceothr_gov	1.22	0.44	0.45	0.6521
insuranceuninsrd_NA	0.16	0.64	-2.90	0.0039

Model 5

logitm5 <- svyglm(PREP_CURRENT ~ income_cat,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 5
term	estimate	std.error	statistic	p.value
(Intercept)	0.45	0.16	-5.03	0.0000
income_catfpl138	0.12	0.49	-4.21	0.0000
income_catfpl138-300	0.47	0.29	-2.62	0.0090
income_catfpl300-400	1.10	0.34	0.28	0.7806
income_catfpl400-500	0.73	0.41	-0.76	0.4463
income_catfpl500-600	2.08	0.46	1.58	0.1147
income_catMissing	0.20	0.80	-1.99	0.0465

Model 6

logitm5 <- svyglm(PREP_CURRENT ~ poly(snapt, 2),
                  data = prepDF %>% filter(!is.na(snapt)), 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 5
term	estimate	std.error	statistic
(Intercept)	2.400000e-01	0.14	-10.01
poly(snapt, 2)1	1.128082e+12	2.56	10.82
poly(snapt, 2)2	0.000000e+00	2.42	-4.96

Model 7

logitm5 <- svyglm(PREP_CURRENT ~ factor(deg.tot),
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 5
term	estimate	std.error	statistic	p.value
(Intercept)	0.14	0.21	-9.42	0.0000
factor(deg.tot)1	1.70	0.27	1.98	0.0486
factor(deg.tot)2	3.61	0.34	3.81	0.0002
factor(deg.tot)3	11.56	0.33	7.36	0.0000

Model 8

logitm6 <- svyglm(PREP_CURRENT ~ age_group + race + region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm6, exponentiate = T), 
      caption= "Model 6", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 6
term	estimate	std.error	statistic	p.value
(Intercept)	0.13	0.46	-4.43	0.0000
age_group25-34	3.20	0.47	2.46	0.0143
age_group35-44	4.26	0.49	2.99	0.0029
age_group45-54	4.55	0.49	3.10	0.0020
age_group55-65	2.55	0.51	1.82	0.0695
raceB	1.96	0.62	1.09	0.2756
raceH	0.61	0.37	-1.33	0.1827
regionEasternWA	0.31	0.37	-3.20	0.0014
regionWesternWA	0.63	0.21	-2.19	0.0286

Model 9

logitm7 <- svyglm(PREP_CURRENT ~ age_group + race + region + 
                    factor(deg.tot),
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm7, exponentiate = T), 
      caption= "Model 7", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 7
term	estimate	std.error	statistic	p.value
(Intercept)	0.07	0.49	-5.31	0.0000
age_group25-34	3.09	0.47	2.41	0.0163
age_group35-44	3.08	0.49	2.31	0.0210
age_group45-54	4.60	0.48	3.15	0.0017
age_group55-65	1.91	0.52	1.24	0.2170
raceB	0.80	0.56	-0.41	0.6837
raceH	0.44	0.42	-1.94	0.0528
regionEasternWA	0.30	0.42	-2.86	0.0044
regionWesternWA	0.63	0.24	-1.96	0.0501
factor(deg.tot)1	1.47	0.28	1.36	0.1739
factor(deg.tot)2	3.00	0.34	3.19	0.0015
factor(deg.tot)3	11.75	0.33	7.45	0.0000

Model 10

logitm8 <- svyglm(PREP_CURRENT ~ age_group + race + region + 
                    deg.tot.b + snapb,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm8, exponentiate = T), 
      caption= "Model 8", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

Model 8
term	estimate	std.error	statistic	p.value
(Intercept)	0.08	0.43	-5.95	0.0000
age_group25-34	2.99	0.46	2.40	0.0167
age_group35-44	3.17	0.47	2.45	0.0147
age_group45-54	3.95	0.48	2.87	0.0043
age_group55-65	1.82	0.52	1.15	0.2512
raceB	1.29	0.68	0.37	0.7080
raceH	0.38	0.46	-2.09	0.0369
regionEasternWA	0.25	0.44	-3.14	0.0018
regionWesternWA	0.58	0.25	-2.25	0.0250
deg.tot.b	2.95	0.36	3.00	0.0028
snapb	5.71	0.28	6.21	0.0000

Comparison of AICs

models <- get_formula("logitm", 8)
aics <- get_aics("logitm", 8, "logit")

kable(data.frame(Model = models, 
                 AIC = aics),
      caption= "AIC comparison", digits = 1) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))

AIC comparison
Model	AIC
PREP_CURRENT ~ age_group	707.7
PREP_CURRENT ~ race	720.7
PREP_CURRENT ~ region	711.4
PREP_CURRENT ~ insurance	712.0
PREP_CURRENT ~ factor(deg.tot)	640.0
PREP_CURRENT ~ age_group + race + region	698.9
PREP_CURRENT ~ age_group + race + region + factor(deg.tot)	622.7
PREP_CURRENT ~ age_group + race + region + deg.tot.b + snapb	586.8

Save out targets

Given the small number on PrEP, we can only construct the marginal breakdowns by attributes.

# just pull the proportions currently on prep
age <- prep_curr_age %>% 
  filter(PREP_CURRENT=="on prep") %>%
  select(-PREP_CURRENT)
race <- prep_curr_race %>% 
  filter(PREP_CURRENT=="on prep") %>%
  select(-PREP_CURRENT)
region <- prep_curr_region %>% 
  filter(PREP_CURRENT=="on prep") %>%
  select(-PREP_CURRENT)

## Save objects
save_out$target_prep_current <- list(age = age,
                                     race = race,
                                     region = region)

Frequency of PREP related check-ups

Most of the respondents who are on PrEP visit clinics every 3 month.

kable(data.frame(table(cDF$PREPCHKFREQ_CURR)), 
      caption= "Frequency of PREP related check-ups ") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Current PrEP users; n = ",
                 table(prepDF$PREP_CURRENT)[[2]]))

Frequency of PREP related check-ups
Var1	Freq
Every month	1
Every 3 months	137
Every 6 months	11
Every 9 months	0
Every 12 months	5
I have returned only once	1
I return at some other interval of time	1
I do not return regularly	4
Im not sure	0
I prefer not to answer	0
Note:
Current PrEP users; n = 152

PDAP

Whether the respondent was on WA State PrEP-DAP (PDAPA) or on Gilead’s Medication Assistance Program (PDAPB) among those who are currently on PrEP
Sample restricted to those currently on PrEP: 227. This is too few cases to use for prediction purposes.

Descriptive statistics

Overall

# with(cDF, table(PREP_CURRENT, statePDAP = PDAPA, useNA = "al"))
# with(cDF, table(PREP_CURRENT, gileadPDP = PDAPB, useNA = "al"))
# with(cDF, table(statePDAP = PDAPA, gileadPDP = PDAPB, useNA = "al"))

baseDF <- cDF %>%
  filter(PREP_CURRENT == 1) %>%
  mutate(PDAP = case_when(PDAPA == 1 | PDAPB == 1 ~ "PDAP",
                             TRUE ~ "no PDAP"))

pdap_prop <- baseDF %>%
  group_by(PDAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop %>% kable(caption= "Distribution of PDAP enrollment") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Sample restricted to current PrEP users, n =",
                 sum(pdap_prop$n)))

Distribution of PDAP enrollment
PDAP	n	n.wtd	prop.wtd
no PDAP	49	46.7	0.28
PDAP	112	120.8	0.72
Note:
Sample restricted to current PrEP users, n = 161

Age

Table

pdap_prop_age <- baseDF %>%
  group_by(PDAP, age_group) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop_age %>% 
  pivot_wider(names_from = PDAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Age", rep(c("no PDAP", "PDAP"), 3)),
        caption= paste("PDAP enrollment by age")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current PrEP users; n = ",
                 sum(pdap_prop$n)))

PDAP enrollment by age
	Obs N		Wtd N		Wtd Prop
Age	no PDAP	PDAP	no PDAP	PDAP	no PDAP	PDAP
15-24	2	5	2.5	7.5	0.25	0.75
25-34	19	36	18.4	33.9	0.35	0.65
35-44	9	29	8.6	36.1	0.19	0.81
45-54	13	28	12.0	29.0	0.29	0.71
55-65	6	14	5.2	14.4	0.27	0.73
Note:
Restricted to current PrEP users; n = 161

Plot

fig <- ggplot(pdap_prop_age, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('PDAP enrollment by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(pdap_prop$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

pdap_prop_race <- baseDF %>%
  group_by(PDAP, race) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop_race %>% 
  pivot_wider(names_from = PDAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Race", rep(c("no PDAP", "PDAP"), 3)),
        caption= paste("PDAP enrollment by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current PrEP users; n = ",
                 sum(pdap_prop$n)))

PDAP enrollment by race
	Obs N		Wtd N		Wtd Prop
Race	no PDAP	PDAP	no PDAP	PDAP	no PDAP	PDAP
B	3	3	3.2	13.9	0.19	0.81
H	3	10	2.7	8.7	0.24	0.76
O	43	99	40.8	98.2	0.29	0.71
Note:
Restricted to current PrEP users; n = 161

Plot

fig <- ggplot(pdap_prop_race, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(pdap_prop$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

pdap_prop_region <- baseDF %>%
  group_by(PDAP, region) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop_region %>% 
  pivot_wider(names_from = PDAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Region", rep(c("no PDAP", "PDAP"), 3)),
        caption= paste("PDAP enrollment by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current PrEP users; n = ",
                 sum(pdap_prop$n)))

PDAP enrollment by region
	Obs N		Wtd N		Wtd Prop
Region	no PDAP	PDAP	no PDAP	PDAP	no PDAP	PDAP
EasternWA	4	7	2.8	4.2	0.40	0.60
King	25	69	28.1	88.5	0.24	0.76
WesternWA	20	36	15.8	28.1	0.36	0.64
Note:
Restricted to current PrEP users; n = 161

Plot

fig <- ggplot(pdap_prop_region, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(pdap_prop$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- baseDF %>%
  group_by(insurance, PDAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PDAP enrollment by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("Proportion of current PrEP users; n = ",
                 sum(tempDF$n)))

Distribution of PDAP enrollment by insurance
	Insurance
PDAP	emplyr	indvdl_othr	medicare_caid	othr_gov	uninsrd_NA
no PDAP	0.2	0.28	0.75	0.58	0.69
PDAP	0.8	0.72	0.25	0.42	0.31
Note:
Proportion of current PrEP users; n = 161

Plot

fig <- ggplot(tempDF, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- baseDF %>%
  group_by(income_cat, PDAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PDAP enrollment by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL Income category" = 7)) %>%
  footnote(paste("Proportion of current PrEP users; n = ",
                 sum(tempDF$n)))

Distribution of PDAP enrollment by FPL Income Category
	FPL Income category
PDAP	fpl138	fpl138-300	fpl300-400	fpl400-500	fpl500-600	fpl600+	Missing
no PDAP	0.85	0.4	0.38	0.54	0.17	0.19	0.21
PDAP	0.15	0.6	0.62	0.46	0.83	0.81	0.79
Note:
Proportion of current PrEP users; n = 161

Plot

fig <- ggplot(tempDF, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Save out targets

Given small sample sizes, we may need to use the overall proportion on PDAP. But we save the age/race/region breakdowns

prop <- pdap_prop %>% 
  filter(PDAP=="PDAP") %>%
  select(-PDAP)

# We leave the age/race/region breakdowns intact to make small cell sizes visible.

## Save objects
save_out$target_pdap <- list(prop = prop,
                             age = pdap_prop_age,
                             race = pdap_prop_race,
                             region = pdap_prop_region)

ART use and ADAP

Overall

ART use is nearly universal for HIV+

# There are 3 NA that can't be recovered
# with(cDF[cDF$diag.status==1,], table(ART_CURRENT, useNA="al"))

baseDF <- cDF %>%
  filter(diag.status == 1 & !is.na(ART_CURRENT)) %>%
  mutate(ART_CURRENT = factor(ifelse(ART_CURRENT == 1, 1, 0),
                              levels = c(0, 1), 
                              labels = c("not on ART", "on ART")),
         ADAP = factor(ifelse(ADAP == 1, 1, 0),
                       levels = c(0, 1), 
                       labels = c("no ADAP", "ADAP")))

tempDF <- baseDF %>%
  group_by(ART_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of ART use") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Sample restricted to HIV+, n =",
                 sum(tempDF$n)))

Distribution of ART use
ART_CURRENT	n	n.wtd	prop.wtd
not on ART	2	2.9	0.03
on ART	87	89.7	0.97
Note:
Sample restricted to HIV+, n = 89

Enrolled in ADAP

This is restricted to HIV-positive on ART, resulting in a very small sample size.
The breakdowns by race are not reliable due to small cell sizes
The breakdown by region is slightly more reliable.

Overall

adap_prop <- baseDF %>%
  filter(ART_CURRENT == "on ART") %>%
  group_by(ADAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

adap_prop %>% kable(caption= "Distribution of ADAP enrollment") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Sample restricted to HIv+ on ART, n =",
                 sum(adap_prop$n)))

Distribution of ADAP enrollment
ADAP	n	n.wtd	prop.wtd
no ADAP	58	62.9	0.7
ADAP	29	26.8	0.3
Note:
Sample restricted to HIv+ on ART, n = 87

Race

adap_prop_race <- baseDF %>%
  filter(ART_CURRENT == "on ART") %>%
  group_by(ADAP, race) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

adap_prop_race %>% 
  pivot_wider(names_from = ADAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Race", rep(c("no ADAP", "ADAP"), 3)),
        caption= paste("ADAP enrollment by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current ART users; n = ",
                 sum(adap_prop_race$n)))

ADAP enrollment by race
	Obs N		Wtd N		Wtd Prop
Race	no ADAP	ADAP	no ADAP	ADAP	no ADAP	ADAP
B	1	2	2.4	3.0	0.44	0.56
H	5	2	9.4	1.9	0.83	0.17
O	52	25	51.1	22.0	0.70	0.30
Note:
Restricted to current ART users; n = 87

Region

adap_prop_region <- baseDF %>%
  filter(ART_CURRENT == "on ART") %>%
  group_by(ADAP, region) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

adap_prop_region %>% 
  pivot_wider(names_from = ADAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Region", rep(c("no ADAP", "ADAP"), 3)),
        caption= paste("ADAP enrollment by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current ART users; n = ",
                 sum(adap_prop_region$n)))

ADAP enrollment by region
	Obs N		Wtd N		Wtd Prop
Region	no ADAP	ADAP	no ADAP	ADAP	no ADAP	ADAP
EasternWA	4	5	2.4	2.9	0.45	0.55
King	30	12	38.3	14.4	0.73	0.27
WesternWA	24	12	22.2	9.5	0.70	0.30
Note:
Restricted to current ART users; n = 87

Save out targets

Given small sample sizes, we just save the overall proportion on PDAP.

prop <- adap_prop %>% 
  filter(ADAP=="ADAP") %>%
  select(-ADAP)

# We leave the race/region breakdown intact to make cell size issues visible

## Save objects
save_out$target_adap <- list(prop = prop,
                             race = adap_prop_race,
                             region = adap_prop_region)

STI infections in the past 12 months

This is based on three variables BSTIA (GC), BSTIB (CT), and BSTIC (syphilis).

baseDF <- cDF %>%
  mutate(
    PDAP = ifelse(PDAPA == 1 | PDAPB == 1, 1, 0),
    PREP = ifelse(is.na(PREP_CURRENT), 0, PREP_CURRENT),
    hiv.grp = case_when(
      diag.status==0 & PREP == 0 ~ 1,
      diag.status==0 & PREP == 1 & PDAP == 0 ~ 2,
      diag.status==0 & PREP == 1 & PDAP == 1 ~ 3,
      diag.status==1 & ART_CURRENT == 0 ~ 4,
      diag.status==1 & ART_CURRENT == 1 & ADAP == 0 ~ 5,
      diag.status==1 & ART_CURRENT == 1 & ADAP == 1 ~ 6),
    hiv.grp = factor(hiv.grp,
                     levels = c(1:6),
                     labels = c("HIV- no PrEP", "HIV- on PrEP",
                                "HIV- in PDAP",
                                "HIV+ no ART", "HIV+ on ART",
                                "HIV+ in ADAP")),
    GC = factor(ifelse(BSTIA > 3, 4, BSTIA), 
                levels = c(1, 2, 3, 4), 
                labels = c("never", "once", "more than once", "DK/Ref")), 
    CT = factor(ifelse(BSTIB > 3, 4, BSTIB), 
                levels = c(1, 2, 3, 4), 
                labels = c("never", "once", "more than once", "DK/Ref")), 
    Syphilis = factor(ifelse(BSTIC > 3, 4, BSTIC), 
                levels = c(1, 2, 3, 4), 
                labels = c("never", "once", "more than once", "DK/Ref")))

Overall

CT

baseDF %>%
  group_by(CT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "CT in last 12 mos") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

CT in last 12 mos
CT	n	n.wtd	prop.wtd
never	729	716.9	0.78
once	47	55.6	0.06
more than once	16	17.1	0.02
DK/Ref	5	4.4	0.00
NA	126	129.7	0.14

GC

baseDF %>%
  group_by(GC) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "GC in last 12 mos") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

GC in last 12 mos
GC	n	n.wtd	prop.wtd
never	720	717.3	0.78
once	64	63.9	0.07
more than once	11	10.7	0.01
DK/Ref	4	3.6	0.00
NA	124	128.2	0.14

Syphilis

baseDF %>%
  group_by(Syphilis) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "Syphilis in last 12 mos") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))

Syphilis in last 12 mos
Syphilis	n	n.wtd	prop.wtd
never	761	757.9	0.82
once	26	25.2	0.03
more than once	6	6.5	0.01
DK/Ref	6	5.8	0.01
NA	124	128.2	0.14

Dx HIV status

CT

baseDF %>%
  group_by(diag.status, CT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(diag.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = diag.status,
              values_from = prop.wtd) %>%
  kable(caption= "CT in last 12 mos by Dx HIV status") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx HIV status" = 2))

CT in last 12 mos by Dx HIV status
	Dx HIV status
CT	0	1
never	0.78	0.70
once	0.05	0.16
more than once	0.02	NA
DK/Ref	0.00	0.01
NA	0.14	0.13

GC

baseDF %>%
  group_by(diag.status, GC) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(diag.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = diag.status,
              values_from = prop.wtd) %>%
  kable(caption= "GC in last 12 mos by Dx HIV status") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx HIV status" = 2))

GC in last 12 mos by Dx HIV status
	Dx HIV status
GC	0	1
never	0.78	0.76
once	0.07	0.08
more than once	0.01	0.03
DK/Ref	0.00	NA
NA	0.14	0.13

Syphilis

baseDF %>%
  group_by(diag.status, Syphilis) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(diag.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = diag.status,
              values_from = prop.wtd) %>%
  kable(caption= "Syphilis in last 12 mos by Dx HIV status") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx HIV status" = 2))

Syphilis in last 12 mos by Dx HIV status
	Dx HIV status
Syphilis	0	1
never	0.83	0.78
once	0.02	0.08
more than once	0.01	NA
DK/Ref	0.01	0.01
NA	0.14	0.13

HIV/Care/DAP status

CT

nmiss <- with(baseDF, table(is.na(CT) | is.na(hiv.grp)))[[2]]
              
baseDF %>%
  filter(!is.na(CT) & !is.na(hiv.grp)) %>%
  group_by(hiv.grp, CT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(hiv.grp) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = hiv.grp,
              values_from = prop.wtd) %>%
  kable(caption= "CT in last 12 mos by HIV/Care/DAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx/Care/DAP status" = 5)) %>%
  footnote(paste(nmiss, "Missing cases"))

CT in last 12 mos by HIV/Care/DAP
	Dx/Care/DAP status
CT	HIV- no PrEP	HIV- on PrEP	HIV- in PDAP	HIV+ on ART	HIV+ in ADAP
never	0.95	0.80	0.80	0.81	0.83
once	0.03	0.09	0.17	0.18	0.17
more than once	0.02	0.11	0.03	NA	NA
DK/Ref	0.01	NA	NA	0.01	NA
Note:
130 Missing cases

GC

nmiss <- with(baseDF, table(is.na(GC) | is.na(hiv.grp)))[[2]]

baseDF %>%
  filter(!is.na(GC) & !is.na(hiv.grp)) %>%
  group_by(hiv.grp, GC) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(hiv.grp) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = hiv.grp,
              values_from = prop.wtd) %>%
  kable(caption= "GC in last 12 mos by HIV/Care/DAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx/Care/DAP status" = 5)) %>%
  footnote(paste(nmiss, "Missing cases"))

GC in last 12 mos by HIV/Care/DAP
	Dx/Care/DAP status
GC	HIV- no PrEP	HIV- on PrEP	HIV- in PDAP	HIV+ on ART	HIV+ in ADAP
never	0.95	0.70	0.79	0.92	0.81
once	0.04	0.26	0.19	0.06	0.19
more than once	0.01	0.04	0.03	0.01	NA
DK/Ref	0.01	NA	NA	NA	NA
Note:
128 Missing cases

Syphilis

nmiss <- with(baseDF, table(is.na(GC) | is.na(hiv.grp)))[[2]]

baseDF %>%
  filter(!is.na(GC) & !is.na(hiv.grp)) %>%
  group_by(hiv.grp, Syphilis) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(hiv.grp) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = hiv.grp,
              values_from = prop.wtd) %>%
  kable(caption= "Syphilis in last 12 mos by HIV/Care/DAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx/Care/DAP status" = 5)) %>%
  footnote(paste(nmiss, "Missing cases"))

Syphilis in last 12 mos by HIV/Care/DAP
	Dx/Care/DAP status
Syphilis	HIV- no PrEP	HIV- on PrEP	HIV- in PDAP	HIV+ on ART	HIV+ in ADAP
never	0.98	0.91	0.90	0.91	0.92
once	0.01	0.02	0.08	0.07	0.08
more than once	0.01	0.07	NA	NA	NA
DK/Ref	0.00	NA	0.02	0.02	NA
Note:
128 Missing cases

Descriptives table

des_table <- 
  tibble(Outputs = names(save_out), 
         Description = c("predicted income", 
                         "income FPL levels", 
                         "predicted insurance", 
                         "predicted PrEP awareness", 
                         "predicted PrEP ever (interest)", 
                         "target HIV- on PrEP", 
                         "target PrEP users in PDAP", 
                         "target ART patients in ADAP"), 
         Source = c("multinomial regression on age, race, region", 
                         "FPL guidelines",
                         "multinomial regression on age, race, region, hiv status", 
                         "logistic regression on race, region", 
                         "logistic regression on age, region", 
                         "obs props overall and by age, race and region", 
                         "obs props overall and by age, race and region", 
                         "obs props overall and by race and region"), 
         group = c(rep("Parameters: Demog", 3),
                   rep("Parameters: PrEP", 2),
                   rep("Targets", 3))) 
des_table %>%
  gt::gt(rowname_col = "Outputs",
     groupname_col = "group")

	Description	Source
Parameters: Demog
pred_inc	predicted income	multinomial regression on age, race, region
income_levels	income FPL levels	FPL guidelines
pred_ins	predicted insurance	multinomial regression on age, race, region, hiv status
Parameters: PrEP
pred_prep_aware	predicted PrEP awareness	logistic regression on race, region
pred_prep_ever	predicted PrEP ever (interest)	logistic regression on age, region
Targets
target_prep_current	target HIV- on PrEP	obs props overall and by age, race and region
target_pdap	target PrEP users in PDAP	obs props overall and by age, race and region
target_adap	target ART patients in ADAP	obs props overall and by race and region

print("Structure of output object:")

## [1] "Structure of output object:"

for(i in 1:length(names(save_out))){
  str(save_out[i], vec.len=0, give.attr=F)}

## List of 1
##  $ pred_inc:'data.frame':    270 obs. of  5 variables:
##   ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   ..$ race     : Factor w/ 3 levels "B","H","O": NULL ...
##   ..$ region   : Factor w/ 3 levels "EasternWA","King",..: NULL ...
##   ..$ income   : chr [1:270]  ...
##   ..$ prob     : num [1:270] NULL ...
## List of 1
##  $ income_levels:'data.frame':   6 obs. of  3 variables:
##   ..$ lvl: int [1:6] NULL ...
##   ..$ lb : num [1:6] NULL ...
##   ..$ ub : num [1:6] NULL ...
## List of 1
##  $ pred_ins: tibble [450 x 6] (S3: tbl_df/tbl/data.frame)
##   ..$ age_group  : Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   ..$ race       : Factor w/ 3 levels "B","H","O": NULL ...
##   ..$ region     : Factor w/ 3 levels "EasternWA","King",..: NULL ...
##   ..$ diag.status: chr [1:450]  ...
##   ..$ insurance  : chr [1:450]  ...
##   ..$ prob       : num [1:450] NULL ...
## List of 1
##  $ pred_prep_aware:'data.frame': 9 obs. of  3 variables:
##   ..$ race  : Factor w/ 3 levels "O","B","H": NULL ...
##   ..$ region: Factor w/ 3 levels "King","EasternWA",..: NULL ...
##   ..$ prob  : num [1:9] NULL ...
## List of 1
##  $ pred_prep_ever:'data.frame':  15 obs. of  3 variables:
##   ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   ..$ region   : Factor w/ 3 levels "King","EasternWA",..: NULL ...
##   ..$ prob     : num [1:15] NULL ...
## List of 1
##  $ target_prep_current:List of 3
##   ..$ age   : grouped_df [5 x 4] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   .. ..$ n        : int [1:5] NULL ...
##   .. ..$ n.wtd    : num [1:5] NULL ...
##   .. ..$ prop.wtd : num [1:5] NULL ...
##   ..$ race  : grouped_df [3 x 4] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ race    : chr [1:3]  ...
##   .. ..$ n       : int [1:3] NULL ...
##   .. ..$ n.wtd   : num [1:3] NULL ...
##   .. ..$ prop.wtd: num [1:3] NULL ...
##   ..$ region: grouped_df [3 x 4] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ region  : chr [1:3]  ...
##   .. ..$ n       : int [1:3] NULL ...
##   .. ..$ n.wtd   : num [1:3] NULL ...
##   .. ..$ prop.wtd: num [1:3] NULL ...
## List of 1
##  $ target_pdap:List of 4
##   ..$ prop  : tibble [1 x 3] (S3: tbl_df/tbl/data.frame)
##   .. ..$ n       : int NULL ...
##   .. ..$ n.wtd   : num NULL ...
##   .. ..$ prop.wtd: num NULL ...
##   ..$ age   : grouped_df [10 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ PDAP     : chr [1:10]  ...
##   .. ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   .. ..$ n        : int [1:10] NULL ...
##   .. ..$ n.wtd    : num [1:10] NULL ...
##   .. ..$ prop.wtd : num [1:10] NULL ...
##   ..$ race  : grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ PDAP    : chr [1:6]  ...
##   .. ..$ race    : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...
##   ..$ region: grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ PDAP    : chr [1:6]  ...
##   .. ..$ region  : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...
## List of 1
##  $ target_adap:List of 3
##   ..$ prop  : tibble [1 x 3] (S3: tbl_df/tbl/data.frame)
##   .. ..$ n       : int NULL ...
##   .. ..$ n.wtd   : num NULL ...
##   .. ..$ prop.wtd: num NULL ...
##   ..$ race  : grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ ADAP    : Factor w/ 2 levels "no ADAP","ADAP": NULL ...
##   .. ..$ race    : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...
##   ..$ region: grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ ADAP    : Factor w/ 2 levels "no ADAP","ADAP": NULL ...
##   .. ..$ region  : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...

Save output datafiles

save_out$des_table <- des_table # description table
saveRDS(save_out, here::here("Data", "WhampCostStats.rds"))

WHAMP Cost Statistics

Originally from Zoe Kao, updates MM

2021-02-24

Data management

Missing data

HH income and dependents

Income

Table

Plot

Dependents

Overall

By Income

FPL guidelines

FPL adjusted income

Descriptives

Overall

Age

Table

Plot

Prediction: Adjusted Income

Multinomial regressions

Model 1

Model 2

Model 3

Model 4

Comparison of AICs

Predicted distribution

Insurance

Descriptive statistics

Overall

HIV status

Table

Plot

Age

Table

Plot

Race

Table

Plot

Region

Table

Plot

Income

Table

Plot

Prediction

Multinomial regressions

Model 1

Model 2

Model 3

Model 4

Model 5

Comparison of AICs

Predicted distribution

By race, region and Dx

By age, region and Dx

PrEP aware HIV-

Descriptive statistics

Overall

Age

Table

Plot

Race

Table

Plot

Region

Table

Plot

Insurance

Table

Plot

Income

Table

Plot

Prediction

Logistic regression

Model 1

Model 2

Model 3

Model 4