rm(list = ls())

library(tidyr)
library(stringr)
library(skimr)
library(dplyr)
library(pander)
library(broom)
library(reshape2)
library(ggplot2)
library(ggpubr)
library(plotly)
library(gridExtra)
library(nnet)
library(kableExtra)

# Define a couple of functions for model comparison tables

# for formulas
get_formula <- function(prefix, n) {
  forms <- eval(parse(text = 
                       paste0("list(", 
                              paste0(prefix, 
                                     1:n, 
                                     "$terms", collapse = ","), 
                              ")")))
  out <- sapply(forms, FUN = format)
  return(out)
}

# for aics, need to also specify model type because
# object structures returned from AIC() are different

get_aics <- function(prefix, n, type) {
  mods <- eval(parse(text = 
                      paste0("list(", 
                             paste0(prefix, 
                                    1:n,
                                    collapse = ","), 
                             ")")))
  aics <- sapply(mods, FUN = AIC)
  
  if(type == "mlm"){
    out = aics
  } else {
    out = aics[2,]
    }
  
  return(out)
}

Data management

All of the data in this report come from the WHAMP Survey. The survey was conducted over a three month period using social media (primarily FaceBook, with a small number of Grindr and Growlr respondents) from Sep 11 - Dec 11 2019.

wDF <- WHAMPData::whamp_wide
origwDF <- wDF

# egodata only used for numpart and PrEP indication
# note that only respondents who
# answered the summary SNAP question and had anal sex partners
# after 2010 are in egoDF: 682 WHAMP respondents, 150 ARTnet
# see: Github/WHAMP2/Reports/WHAMPsurvey/Reports/00_data_flow.html#sample-exclusions-summary
egoDF <- WHAMPData::whampArtnetWA_egodata 


income_cat <- attr(wDF$HHINCOME, "labels")
income_lab <- gsub(",", "", names(income_cat))
income_lab <- do.call(rbind, lapply(str_extract_all(income_lab, "[0-9]+"), 
                                    function(x) as.numeric(x[1:2])))
income_lab <- data.frame(income_lab)
names(income_lab) <- c("income_lb", "income_ub")
income_lab$income_cat <- income_cat

income_lab <- data.frame(income_lab)
income_lab$income_ub[income_lab$income_lb == 80000] <- Inf
income_lab <- income_lab %>% filter(income_cat < 10)

prepfreq_lab <- names(attr(wDF$PREPCHKFREQ_CURR, "labels"))


# Get PrEP indication and number of ongoing partners from the ego data:
# PrEP is recommended for those with ongoing HIV positive partner;
# so we get
# (1) the number of ongoing sex partners, and 
# (2) the number of + ongoing

partDF <- bind_rows(egoDF$main$alters, 
                    egoDF$casl$alters) %>%
  arrange(ego.id, type) %>%
  filter(ongoing == 1) %>%
  group_by(ego.id) %>%
  summarise(n_part = n(),
            n_pos_part = sum(hiv),
            prepRecIndicated = max(hiv))


# fpl is from: https://aspe.hhs.gov/2019-poverty-guidelines
# we use the guideline, not the threshold here

adap_fpl_cut <- data.frame(hhsize = c(1:8), 
                           fpl = c(12490, 16910, 21330, 25750,
                                   30170, 34590, 39010, 43430))


# Pull variables into DF

wDF <- wDF %>% 
  select(Vrid, ego.wawt, AGE, age.grp, diag.status, 
         HHINCOME, DEPEND, race.cat, race, 
         region, region,
         
         # Insurance type
         TYP_INSA, TYP_INSA2, TYP_INSG1, TYP_INSG2,
         TYP_INSH, TYP_INSD, TYP_INSE,
         TYP_INSJ, TYP_INSF, TYP_INSI, TYP_INSC,
         TYP_INSB, TYP_INSF_OTHER,
         
         # PrEP related variables
         PREP_AWARE, PREP_EVER, PREP_CURRENT,
         PDAPA, PDAPB, PDAPC, PDAPD, PDAPE, 
         PDAPF, PDAPG, PDAPH, PDAPI, PDAPD_OTHER,
         PREPCHKFREQ_CURR,
         
         # ART related variables
         ART_CURRENT, ADAP, 
         
         # STI: GC, CT, Syphilis
         BSTIA, BSTIB, BSTIC,

         # Risk behavior
         snap, deg.main, deg.casl, deg.tot, 
         count.oo.part, risk.grp) %>%
  
  rename(race.cat4 = race.cat) %>%
  
  mutate(ego.id = Vrid, 
         PREPCHKFREQ_CURR = factor(
           PREPCHKFREQ_CURR, 
           levels = c(1, 2, 3, 4, 5, 6, 7, 8, 88, 99), 
           labels = prepfreq_lab), 
         PREP_CURRENT = ifelse(!is.na(PREP_AWARE) &
                                 is.na(PREP_CURRENT), 
                               0, PREP_CURRENT)) %>%
  left_join(partDF, by = "ego.id") 


wDF <- wDF %>% 
  mutate(hh_income = HHINCOME,
         HHINCOME = ifelse(HHINCOME %in% c(88, 99), NA, HHINCOME), 
         DEPEND = ifelse(is.na(HHINCOME), NA, DEPEND)) %>%
  left_join(. , adap_fpl_cut %>% select(hhsize, fpl), 
            by = c("DEPEND" = "hhsize")) %>%
  left_join(., income_lab, 
            by = c("HHINCOME" = "income_cat")) %>%
  mutate(fpl_lb = round(income_lb / fpl, 2), 
         fpl_ub = round(income_ub / fpl, 2), 
         compare = ifelse(floor(fpl_ub - 0.001) > floor(fpl_lb), 
                          floor(fpl_ub - 0.001), floor(fpl_lb))) %>%
  mutate(compare = 
           ifelse(fpl_ub > 1.38 & fpl_lb < 1.38 | fpl_ub < 1.38, 1.38,
                  ifelse(is.infinite(compare), 
                         floor(fpl_lb), compare)), 
         prop = ifelse(floor(fpl_ub - 0.001) > floor(fpl_lb), 
                       (compare - fpl_lb) / (fpl_ub - fpl_lb), 1)) %>%
  mutate(compare = ifelse(compare == 1, 2, compare), 
         prop = ifelse(compare > fpl_ub | is.infinite(fpl_ub), 1, prop)) %>%
  mutate(prop = ifelse(compare == 1.38 & fpl_ub > 1.38, 
                       (1.38 - fpl_lb) / (fpl_ub - fpl_lb), prop), 
         tmp_class = ifelse(compare > floor(fpl_lb) & prop > 0.5 & compare > 2, 
                            compare - 1, 
                            ifelse(compare == 1.38 & prop < 0.5, 2, compare))) %>%
  mutate(income_cat = as.numeric(factor(tmp_class))) %>%
  mutate(income_cat = ifelse(income_cat >= 6, 6, 
                             ifelse(is.infinite(fpl_ub), 6, income_cat))) %>%
  mutate(income_cat = factor(income_cat, 
                             levels = c(1:6), 
                             labels = c("fpl138", "fpl138-300", "fpl300-400", 
                                        "fpl400-500", "fpl500-600", "fpl600+"))) %>%
  mutate(fpl_lb = round(income_lb / 12490, 2) * 100, 
         fpl_ub = round(income_ub / 12490, 2) * 100) %>%
  mutate(tmp_class = paste0("fpl", fpl_lb, "-", fpl_ub)) %>% 
  mutate(income_cat_o = ifelse(tmp_class == "fpl641-Inf", "fpl641+", 
                               ifelse(tmp_class == "fplNA-NA", NA, tmp_class))) %>%
  mutate(hhincome2 = paste0(income_lb, "-", income_ub)) %>%
  mutate(hhincome2 = ifelse(hhincome2 == "NA-NA", NA, 
                            ifelse(hhincome2 == "80000-Inf", "80000+", hhincome2)))


ins_names <- c("ins_employ", "ins_individual", "ins_medicaid", 
               "ins_medicare", "ins_assistance", 
               "ins_tricare", "ins_VA", "ins_indian", "ins_other", 
               "ins_uninsure", 
               "ins_na1", "ins_na2", "ins_other2")

colnames(wDF)[grep("TYP_INS", colnames(wDF))] <- ins_names

sumInsType <- wDF %>% select(starts_with("ins_"), -c("ins_na1", "ins_na2")) %>%
  mutate(ins_other2 = ifelse(!is.na(ins_other2), 1, 0)) %>%
  mutate(sumInsType = rowSums(.)) %>% count(sumInsType)


wDF <- wDF %>% 
  select(-c("ins_na1", "ins_na2", "ins_other2")) %>%
  mutate(ins_individual = replace(ins_individual, ins_individual == 1, 2),
         ins_other = replace(ins_other, ins_other == 1, 2),
         ins_employ = replace(ins_employ, ins_employ == 1, 3),
         ins_assistance = replace(ins_assistance, ins_assistance == 1, 4),
         ins_tricare = replace(ins_tricare, ins_tricare == 1, 4),
         ins_VA = replace(ins_VA, ins_VA == 1, 4),
         ins_indian = replace(ins_indian, ins_VA == 1, 4),
         ins_medicaid = replace(ins_medicaid, ins_medicaid == 1, 5), 
         ins_medicare = replace(ins_medicare, ins_medicare == 1, 5)) %>%
  mutate(max = pmax(ins_uninsure, ins_individual, ins_other, ins_employ,
                    ins_assistance, ins_tricare, ins_VA, ins_indian, 
                    ins_medicaid, ins_medicare)) %>%
  mutate(insurance = case_when(max == 5 ~ "medicare_caid",
                               max == 4 ~ "othr_gov",
                               max == 3 ~ "emplyr",
                               max == 2 ~ "indvdl_othr",
                               max == 1 ~ "uninsrd", 
                               TRUE ~ NA_character_))

# age group: maximum age 
wDF <- wDF %>% 
  mutate(age_group = factor(age.grp, 
                            levels = c(1:5), 
                            labels = c("15-24", "25-34", "35-44", 
                                       "45-54", "55-65")))

Missing data

Missing values in the demographics come mostly from insurance and income_cat. We group the missing insurance with the uninsured, based on exploration of their patterns. We use the missing income cases as a factor level in the analyses, so we don’t lose them. For all other demographic attributes (including HIV Dx), the missing values are very small, so we use the complete case subset.

Missing values later in the survey, for ART and PrEP use and STI incidence come from attrition.

table(wDF$age_group, is.na(wDF$insurance)) %>% 
  kable(caption= "Missing insurance by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Missing insurance by age
FALSE TRUE
15-24 105 19
25-34 248 12
35-44 181 6
45-54 161 12
55-65 176 7
table(wDF$age_group, is.na(wDF$income_cat)) %>%
  kable(caption= "Missing income by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Missing income by age
FALSE TRUE
15-24 100 24
25-34 240 20
35-44 176 11
45-54 160 13
55-65 168 15
# assign missing insurance to the uninsured/NA category
wDF <- wDF %>%
  mutate(insurance = ifelse(is.na(insurance) | insurance == "uninsrd",
                            "uninsrd_NA", insurance),
         income_cat = forcats::fct_explicit_na(income_cat, "Missing"))


table(wDF$age_group, wDF$insurance) %>% 
  kable(caption= "Final insurance by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Final insurance by age
emplyr indvdl_othr medicare_caid othr_gov uninsrd_NA
15-24 66 5 19 6 28
25-34 172 12 32 17 27
35-44 128 13 24 8 14
45-54 105 17 20 12 19
55-65 106 14 41 11 11
table(wDF$age_group, wDF$income_cat) %>% 
  kable(caption= "Final income categories by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Final income categories by age
fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
15-24 31 33 10 5 1 20 24
25-34 23 70 25 24 17 81 20
35-44 19 25 21 13 3 95 11
45-54 15 22 11 8 5 99 13
55-65 16 40 15 7 1 89 15
# complete case DF for attrs & diag.status
wDF$complete <- with(wDF, complete.cases(age_group, race, region, diag.status))
table(wDF$complete)
## 
## FALSE  TRUE 
##     4   923
cDF <- wDF %>% filter(complete == 1)

# Create the output object
save_out <- list()

HH income and dependents

Income

Table

These are the weighted frequencies

# hh_income is the raw variable
inc_cats <- data.frame(orig = names(attr(wDF$hh_income, "labels")))  %>%
  mutate(labs = gsub(" a.*", "", orig),
         labs = gsub("Im not sure", "DK", labs),
         labs = gsub("I prefer not to", "RF", labs))

names(attr(wDF$hh_income, "labels")) <- inc_cats$labs

sjmisc::frq(wDF$hh_income, weights = ego.wawt)
## 
## What was your household income last year from all sources before taxes? (x) <numeric>
## # total N=927  valid N=898  mean=10.59  sd=20.93
## 
## Value |              Label |   N | Raw % | Valid % | Cum. %
## -----------------------------------------------------------
##     1 |      $0 to $19,999 |  94 | 10.14 |   10.47 |  10.47
##     2 | $20,000 to $29,999 |  89 |  9.60 |    9.91 |  20.38
##     3 | $30,000 to $39,999 |  72 |  7.77 |    8.02 |  28.40
##     4 | $40,000 to $49,999 |  73 |  7.87 |    8.13 |  36.53
##     5 | $50,000 to $59,999 |  53 |  5.72 |    5.90 |  42.43
##     6 | $60,000 to $69,999 |  47 |  5.07 |    5.23 |  47.66
##     7 | $70,000 to $79,999 |  58 |  6.26 |    6.46 |  54.12
##     8 |    $80,000 or more | 360 | 38.83 |   40.09 |  94.21
##    88 |                 DK |  23 |  2.48 |    2.56 |  96.77
##    99 |                 RF |  29 |  3.13 |    3.23 | 100.00
##  <NA> |               <NA> |  29 |  3.13 |    <NA> |   <NA>

Plot

wDF %>% group_by(hh_income) %>%
  summarize(n.wtd = sum(ego.wawt)) %>%
  mutate(prop.wtd = n.wtd / sum(n.wtd)) %>%
  ggplot(aes(x = sjlabelled::as_label(hh_income), y = prop.wtd)) +
  geom_bar(stat="identity", color = "blue", alpha = 0.7) +
  labs(title = "HH income",
       x = "HH income category",
       y = "Wtd proportion") +
  theme(axis.text.x = element_text(size = 8, angle = 50, 
                                   vjust = 0.6))

Dependents

Overall

tempDF <- wDF %>% 
  group_by(DEPEND) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% 
  kable(caption= paste("HH Dependents; n = ",
                       sum(tempDF$n)),
        digits = 2) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) 
HH Dependents; n = 927
DEPEND n n.wtd prop.wtd
1 416 431.6 0.47
2 311 304.3 0.33
3 49 45.2 0.05
4 41 37.0 0.04
5 17 15.1 0.02
6 8 9.4 0.01
7 2 1.7 0.00
NA 83 82.7 0.09

By Income

tempDF <- wDF %>% 
  filter(hh_income < 88 & !is.na(DEPEND)) %>% 
  group_by(DEPEND, hh_income) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(DEPEND) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = DEPEND,
              values_from = prop.wtd) %>%
  tibble::add_column(income = names(attr(wDF$hh_income, "labels"))[1:8],
                    .before=1) %>%
  select(-hh_income) %>%
  kable(caption= paste("Distribution of income by #Dependents; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dependents" = 7))
Distribution of income by #Dependents; n = 844
Dependents
income 1 2 3 4 5 6 7
$0 to $19,999 0.16 0.03 0.05 0.06 0.04 NA 0.6
$20,000 to $29,999 0.18 0.04 0.06 0.02 NA 0.07 NA
$30,000 to $39,999 0.10 0.05 0.07 0.05 0.05 0.17 NA
$40,000 to $49,999 0.10 0.06 0.07 0.04 0.06 0.28 NA
$50,000 to $59,999 0.08 0.05 0.04 0.04 NA NA NA
$60,000 to $69,999 0.06 0.05 0.03 0.02 NA NA 0.4
$70,000 to $79,999 0.07 0.07 0.10 0.05 0.11 NA NA
$80,000 or more 0.25 0.65 0.59 0.72 0.73 0.49 NA
  • Single person HH show a distinct pattern, with more low income and fewer high income earners. Roughly a third of these HH are in the bottom two categories, and just over 25% are in the top category

  • With 2+ in the HH, the majority (over 60% of HH) are in the top income category, with the rest relatively evenly distributed in the lower categories.

  • For the 5+ HH, there are slightly fewer in the top category, and slightly more in the middle.

FPL guidelines

  • These are the federal poverty level (FPL) guidelines by the number of dependents in the household. Source here.
kable(adap_fpl_cut, 
      caption= "FPL by dependents") %>% 
  kable_styling(full_width=F, position="center") 
FPL by dependents
hhsize fpl
1 12490
2 16910
3 21330
4 25750
5 30170
6 34590
7 39010
8 43430

FPL adjusted income

  1. Match the FPL by the number of dependents for each respondent.
  2. Divide the original income bounds by the FPL. This resulted in an income interval with a lower bound and an upper bound of FPL of each individual.
  3. Roughly classify individuals’ income into 5 income categories <138% FPL, 138%-300% FPL, 300%-400% FPL, 500%-600% FPL, and >600% FPL. Then we calculated the overlapping proportion of the interval in step 2 and the intervals defined in this step. If the overlapping proportion is over 50% in an income category, we assign the individual to this category.
  4. If a respondent originally said that their income was above $80,000 per year, the respondent was assigned in the income category >600% FPL no matter how many dependents this respondent had.

Descriptives

Overall

  • In general, individuals with lower income shifted their income categories after adjusting for the number of dependents.
tempDF <- wDF %>%
  filter(!is.na(income_cat)) %>%
  group_by(income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  ungroup() %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) 

p_income <- ggplot(tempDF, 
                   aes(x = income_cat, y = prop.wtd, 
                       text = prop.wtd)) +
  geom_bar(fill = "blue", alpha = 0.5,
           stat="identity",
           position = position_dodge2(width = 0.9)) +
  labs(title = "Income distribution in FPL categories",
       sub = "Not adjusted for dependents",
       caption = "Weighted WHAMP survey data",
       x= "income FPL category",
       y = "weighted proportion") +
  ylim(0, 0.5)

title.plotly <-
  list(text = paste0('Income distribution in FPL categories',
                     '<br>',
                     '<sup>',
                     'Not adjusted for dependents','</sup>'))

fig <- ggplotly(p_income , tooltip = "text",
                width = 600, height = 400) %>%
  layout(hoverlabel = list(font=list(color="white")),
         title = title.plotly)

fig

Age

Table

tempDF <- wDF %>% 
  filter(!is.na(income_cat)) %>% 
  mutate(income_cat = factor(income_cat,
                             levels = sort(unique(income_cat)))) %>%
  group_by(age_group, income_cat) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of income by age; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5))
Distribution of income by age; n = 927
Age group
income_cat 15-24 25-34 35-44 45-54 55-65
fpl138 0.21 0.08 0.09 0.08 0.08
fpl138-300 0.30 0.26 0.11 0.11 0.22
fpl300-400 0.08 0.09 0.11 0.06 0.08
fpl400-500 0.04 0.10 0.07 0.04 0.03
fpl500-600 0.01 0.07 0.01 0.02 0.01
fpl600+ 0.19 0.33 0.55 0.63 0.50
Missing 0.18 0.08 0.06 0.06 0.09

Plot

mycol = RColorBrewer::brewer.pal(length(unique(wDF$age_group))+1,
                                 'Oranges')[-1]

ggplot(tempDF, 
       aes(x = income_cat, y = prop.wtd,
           group = age_group, color = age_group)) +
  geom_line(size = 1) +
  geom_point() +
  scale_color_manual(values=mycol)

Prediction: Adjusted Income

Multinomial regressions

  • The coefficients are relative risk ratios.

Model 1

# multinom only uses complete cases, and inconsistently handles missing
# cases which breaks other functions like summary()

cDFi <- cDF %>% filter(income_cat != "Missing")

inc_mlm1 <- multinom(income_cat ~ diag.status, 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  18 (10 variable)
## initial  value 1510.111738 
## iter  10 value 1207.124161
## final  value 1205.269549 
## converged
#summary(inc_mlm1)
kable(tidy(inc_mlm1, exponentiate = T), digits = 2, 
      caption = "Model 1") %>% 
  kable_styling(full_width = F, 
                position="center", 
                bootstrap_options = c("striped"))
Model 1
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 1.92 0.13 4.91 0.00
fpl138-300 diag.status 1.01 0.42 0.03 0.97
fpl300-400 (Intercept) 0.81 0.16 -1.34 0.18
fpl300-400 diag.status 1.10 0.50 0.20 0.84
fpl400-500 (Intercept) 0.55 0.18 -3.30 0.00
fpl400-500 diag.status 1.19 0.54 0.31 0.75
fpl500-600 (Intercept) 0.27 0.23 -5.57 0.00
fpl500-600 diag.status 0.40 1.05 -0.88 0.38
fpl600+ (Intercept) 4.15 0.12 11.87 0.00
fpl600+ diag.status 1.20 0.37 0.48 0.63

Model 2

inc_mlm2 <- multinom(income_cat ~ age_group, 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  36 (25 variable)
## initial  value 1510.111738 
## iter  10 value 1170.486371
## iter  20 value 1142.577044
## iter  30 value 1139.946598
## final  value 1139.943809 
## converged
kable(tidy(inc_mlm2, exponentiate = T), digits = 2, 
      caption= "Model 2") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 1.42 0.23 1.55 0.12
fpl138-300 age_group25-34 2.12 0.35 2.17 0.03
fpl138-300 age_group35-44 0.91 0.40 -0.23 0.82
fpl138-300 age_group45-54 1.00 0.43 -0.01 0.99
fpl138-300 age_group55-65 1.98 0.39 1.77 0.08
fpl300-400 (Intercept) 0.36 0.33 -3.01 0.00
fpl300-400 age_group25-34 3.08 0.46 2.46 0.01
fpl300-400 age_group35-44 3.56 0.47 2.71 0.01
fpl300-400 age_group45-54 2.01 0.55 1.27 0.20
fpl300-400 age_group55-65 2.61 0.51 1.88 0.06
fpl400-500 (Intercept) 0.19 0.43 -3.84 0.00
fpl400-500 age_group25-34 6.16 0.53 3.41 0.00
fpl400-500 age_group35-44 4.18 0.57 2.50 0.01
fpl400-500 age_group45-54 2.78 0.65 1.58 0.11
fpl400-500 age_group55-65 1.93 0.68 0.97 0.33
fpl500-600 (Intercept) 0.04 0.90 -3.62 0.00
fpl500-600 age_group25-34 20.32 0.96 3.13 0.00
fpl500-600 age_group35-44 4.58 1.10 1.38 0.17
fpl500-600 age_group45-54 8.41 1.07 2.00 0.05
fpl500-600 age_group55-65 2.34 1.30 0.66 0.51
fpl600+ (Intercept) 0.88 0.25 -0.52 0.60
fpl600+ age_group25-34 4.40 0.36 4.13 0.00
fpl600+ age_group35-44 7.35 0.37 5.44 0.00
fpl600+ age_group45-54 9.60 0.39 5.79 0.00
fpl600+ age_group55-65 7.27 0.38 5.16 0.00

Model 3

inc_mlm3 <- multinom(income_cat ~ age_group + factor(race), 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  48 (35 variable)
## initial  value 1510.111738 
## iter  10 value 1168.693774
## iter  20 value 1128.622592
## iter  30 value 1126.846067
## iter  40 value 1126.569792
## iter  50 value 1126.565275
## final  value 1126.565256 
## converged
kable(tidy(inc_mlm3, exponentiate = T), digits = 2, 
      caption= "Model 3") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 2.89 0.55 1.93 0.05
fpl138-300 age_group25-34 2.08 0.35 2.11 0.04
fpl138-300 age_group35-44 0.90 0.40 -0.26 0.79
fpl138-300 age_group45-54 0.99 0.43 -0.03 0.98
fpl138-300 age_group55-65 2.03 0.39 1.83 0.07
fpl138-300 factor(race)H 0.50 0.61 -1.13 0.26
fpl138-300 factor(race)O 0.46 0.53 -1.46 0.15
fpl300-400 (Intercept) 0.58 0.68 -0.79 0.43
fpl300-400 age_group25-34 3.02 0.46 2.42 0.02
fpl300-400 age_group35-44 3.53 0.47 2.69 0.01
fpl300-400 age_group45-54 1.97 0.55 1.24 0.22
fpl300-400 age_group55-65 2.58 0.51 1.85 0.06
fpl300-400 factor(race)H 0.49 0.75 -0.96 0.34
fpl300-400 factor(race)O 0.63 0.63 -0.72 0.47
fpl400-500 (Intercept) 0.22 0.82 -1.85 0.06
fpl400-500 age_group25-34 6.24 0.53 3.43 0.00
fpl400-500 age_group35-44 4.16 0.57 2.50 0.01
fpl400-500 age_group45-54 2.84 0.65 1.61 0.11
fpl400-500 age_group55-65 2.03 0.68 1.04 0.30
fpl400-500 factor(race)H 1.17 0.82 0.19 0.85
fpl400-500 factor(race)O 0.78 0.73 -0.34 0.74
fpl500-600 (Intercept) 0.00 0.61 -25.46 0.00
fpl500-600 age_group25-34 21.47 0.96 3.18 0.00
fpl500-600 age_group35-44 4.64 1.10 1.39 0.16
fpl500-600 age_group45-54 8.93 1.07 2.05 0.04
fpl500-600 age_group55-65 2.52 1.30 0.71 0.48
fpl500-600 factor(race)H 347946.96 0.44 29.24 0.00
fpl500-600 factor(race)O 168177.03 0.38 31.32 0.00
fpl600+ (Intercept) 1.04 0.56 0.08 0.94
fpl600+ age_group25-34 4.29 0.36 4.04 0.00
fpl600+ age_group35-44 7.34 0.37 5.42 0.00
fpl600+ age_group45-54 9.30 0.39 5.69 0.00
fpl600+ age_group55-65 6.86 0.39 4.99 0.00
fpl600+ factor(race)H 0.40 0.61 -1.50 0.13
fpl600+ factor(race)O 0.93 0.53 -0.13 0.89

Model 4

inc_mlm4 <- multinom(income_cat ~ age_group + 
                       factor(race) + factor(region), 
                     data = cDFi, 
                     weights = ego.wawt)
## # weights:  60 (45 variable)
## initial  value 1510.111738 
## iter  10 value 1141.915601
## iter  20 value 1097.041447
## iter  30 value 1094.935028
## iter  40 value 1094.415678
## iter  50 value 1094.348000
## final  value 1094.347252 
## converged
kable(tidy(inc_mlm4, exponentiate = T), digits = 2, 
      caption= "Model 4") %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
y.level term estimate std.error statistic p.value
fpl138-300 (Intercept) 1.75 0.64 0.87 0.38
fpl138-300 age_group25-34 2.07 0.35 2.09 0.04
fpl138-300 age_group35-44 0.92 0.40 -0.20 0.84
fpl138-300 age_group45-54 1.02 0.44 0.04 0.97
fpl138-300 age_group55-65 2.14 0.39 1.95 0.05
fpl138-300 factor(race)H 0.60 0.62 -0.83 0.41
fpl138-300 factor(race)O 0.52 0.54 -1.22 0.22
fpl138-300 factor(region)King 1.92 0.37 1.74 0.08
fpl138-300 factor(region)WesternWA 1.25 0.37 0.60 0.55
fpl300-400 (Intercept) 0.28 0.81 -1.56 0.12
fpl300-400 age_group25-34 3.02 0.46 2.40 0.02
fpl300-400 age_group35-44 3.63 0.47 2.73 0.01
fpl300-400 age_group45-54 2.01 0.55 1.26 0.21
fpl300-400 age_group55-65 2.72 0.51 1.95 0.05
fpl300-400 factor(race)H 0.61 0.75 -0.65 0.51
fpl300-400 factor(race)O 0.73 0.63 -0.50 0.62
fpl300-400 factor(region)King 2.38 0.49 1.78 0.08
fpl300-400 factor(region)WesternWA 1.61 0.48 0.99 0.32
fpl400-500 (Intercept) 0.08 0.98 -2.60 0.01
fpl400-500 age_group25-34 6.23 0.54 3.41 0.00
fpl400-500 age_group35-44 4.34 0.57 2.55 0.01
fpl400-500 age_group45-54 2.94 0.65 1.65 0.10
fpl400-500 age_group55-65 2.19 0.68 1.15 0.25
fpl400-500 factor(race)H 1.54 0.82 0.52 0.60
fpl400-500 factor(race)O 0.93 0.74 -0.10 0.92
fpl400-500 factor(region)King 3.40 0.58 2.10 0.04
fpl400-500 factor(region)WesternWA 1.95 0.59 1.13 0.26
fpl500-600 (Intercept) 0.00 0.73 -21.69 0.00
fpl500-600 age_group25-34 21.14 0.97 3.16 0.00
fpl500-600 age_group35-44 4.83 1.11 1.42 0.15
fpl500-600 age_group45-54 9.60 1.07 2.11 0.03
fpl500-600 age_group55-65 2.75 1.30 0.78 0.44
fpl500-600 factor(race)H 285573.47 0.47 26.66 0.00
fpl500-600 factor(race)O 127991.23 0.45 26.20 0.00
fpl500-600 factor(region)King 3.06 0.74 1.51 0.13
fpl500-600 factor(region)WesternWA 1.22 0.78 0.25 0.80
fpl600+ (Intercept) 0.21 0.67 -2.33 0.02
fpl600+ age_group25-34 4.30 0.37 3.92 0.00
fpl600+ age_group35-44 8.09 0.38 5.48 0.00
fpl600+ age_group45-54 10.74 0.41 5.83 0.00
fpl600+ age_group55-65 8.08 0.40 5.22 0.00
fpl600+ factor(race)H 0.58 0.63 -0.86 0.39
fpl600+ factor(race)O 1.24 0.54 0.40 0.69
fpl600+ factor(region)King 7.27 0.38 5.17 0.00
fpl600+ factor(region)WesternWA 1.70 0.38 1.38 0.17

Comparison of AICs

models <- get_formula("inc_mlm", 4)
aics <- get_aics("inc_mlm", 4, "mlm")

kable(data.frame(model = models, 
                 AIC = aics),
      digits = 0,
      caption= paste("Comparison of AICs; n =", dim(cDFi)[[1]])) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Comparison of AICs; n = 842
model AIC
income_cat ~ diag.status 2431
income_cat ~ age_group 2330
income_cat ~ age_group + factor(race) 2323
income_cat ~ age_group + factor(race) + factor(region) 2279
  • The final model is model 4, including age + race + region.

Predicted distribution

# Construct and save output
expand_df <- expand.grid(
  age_group = factor(c(1:5), 
                     levels = c(1:5), 
                     labels = levels(wDF$age_group)), 
  race = factor(c("B", "H", "O")), 
  region = c("EasternWA", "King", "WesternWA"))

pred_inc <- cbind(expand_df, 
                  predict(inc_mlm4, 
                          newdata = expand_df, 
                          "probs"))

pred_inc <- pred_inc %>% 
  gather(income, prob, `fpl138`:`fpl600+`)

income_levels <- data.frame(lvl = c(1:6), 
                            lb = c(0, 138, 300, 400, 500, 600), 
                            ub = c(137, 299, 399, 499, 599, 1000))

# Save output
save_out$pred_inc <- pred_inc
save_out$income_levels <- income_levels
ggplot(data = pred_inc, aes(x = income, y = prob, group = race)) +
  geom_line(aes(color = race)) +
  facet_wrap(~ region + age_group, ncol = 5) +
  ylim(0, 0.8) +
  labs(title = "FPL income category by race, region and age",
       y = "predicted distribution") + 
  theme(axis.text.x = element_text(size = 8, angle = 30, hjust = 0.5),
        strip.text.x = element_text(size = 8),
        legend.position = "bottom")


Insurance

Descriptive statistics

Overall

#with(cDF, table(insurance, useNA = "al"))

cDF %>%
  group_by(insurance) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "Distribution of insurance") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Distribution of insurance
insurance n n.wtd prop.wtd
emplyr 576 586.9 0.64
indvdl_othr 61 54.9 0.06
medicare_caid 134 120.9 0.13
othr_gov 54 56.2 0.06
uninsrd_NA 98 104.8 0.11

HIV status

Table

tempDF <- cDF %>% 
  mutate(dx.status = ifelse(diag.status == 1, "HIV+", "HIV-")) %>%
  group_by(dx.status, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(dx.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = dx.status,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by Dx status; n =",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx status" = 2))
Distribution of insurance by Dx status; n = 923
Dx status
insurance HIV- HIV+
emplyr 0.64 0.60
indvdl_othr 0.06 0.04
medicare_caid 0.12 0.21
othr_gov 0.05 0.14
uninsrd_NA 0.13 0.01

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = dx.status, fill = dx.status)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))

title.plotly <-
  list(text = paste0('Insurance by HIV Dx',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Age

Table

tempDF <- cDF %>% 
  group_by(age_group, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by age group; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age Group" = 5))
Distribution of insurance by age group; n = 923
Age Group
insurance 15-24 25-34 35-44 45-54 55-65
emplyr 0.54 0.67 0.71 0.64 0.59
indvdl_othr 0.03 0.04 0.06 0.09 0.07
medicare_caid 0.13 0.12 0.10 0.10 0.22
othr_gov 0.05 0.07 0.06 0.06 0.06
uninsrd_NA 0.26 0.10 0.07 0.11 0.05

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1))

title.plotly <-
  list(text = paste0('Insurance distribution by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

Almost all of the uninsured are Black or Hispanic.

tempDF <- cDF %>% 
  group_by(race, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by race; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3))
Distribution of insurance by race; n = 923
Race
insurance B H O
emplyr 0.47 0.52 0.67
indvdl_othr 0.02 0.02 0.07
medicare_caid 0.13 0.15 0.13
othr_gov 0.18 0.12 0.04
uninsrd_NA 0.20 0.19 0.10

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Insurance distribution by race',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- cDF %>% 
  group_by(region, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by region; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3))
Distribution of insurance by region; n = 923
Region
insurance EasternWA King WesternWA
emplyr 0.46 0.71 0.56
indvdl_othr 0.10 0.06 0.05
medicare_caid 0.22 0.08 0.19
othr_gov 0.04 0.04 0.10
uninsrd_NA 0.18 0.11 0.10

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  ylab("wtd proportion") +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Insurance distribution by region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

# We show the missing income cases in the table
tempDF <- cDF %>% 
  group_by(income_cat, insurance) %>%
  summarise(n = n(),
            n.wtd = sum(ego.wawt)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of insurance by FPL income category; n = ",
                       sum(tempDF$n))) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL income category" = 7))
Distribution of insurance by FPL income category; n = 923
FPL income category
insurance fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
emplyr 0.18 0.49 0.76 0.72 0.92 0.83 0.27
indvdl_othr 0.07 0.08 0.07 0.02 0.02 0.06 0.03
medicare_caid 0.56 0.19 0.08 0.10 NA 0.02 0.15
othr_gov 0.06 0.08 0.06 0.13 0.06 0.05 0.06
uninsrd_NA 0.14 0.16 0.04 0.04 NA 0.04 0.48

Plot

fig <- ggplot(tempDF, 
              aes(x = insurance, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(axis.text.x = element_text(size = 10, angle = 10, hjust = 1),
        plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Insurance distribution by FPL income category',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Multinomial regressions

insDF <- cDF %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"))

Model 1

ins_mlm1 <- multinom(insurance ~ age_group, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  30 (20 variable)
## initial  value 1486.639404 
## iter  10 value 1047.234884
## iter  20 value 1023.345868
## final  value 1023.317376 
## converged
kable(tidy(ins_mlm1, exponentiate = T), 
      caption= "Model 1", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 1
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.057 0.467 -6.117 0.000
indvdl_othr age_group25-34 1.122 0.570 0.203 0.839
indvdl_othr age_group35-44 1.539 0.555 0.776 0.438
indvdl_othr age_group45-54 2.418 0.543 1.627 0.104
indvdl_othr age_group55-65 2.189 0.552 1.420 0.156
medicare_caid (Intercept) 0.236 0.249 -5.797 0.000
medicare_caid age_group25-34 0.752 0.324 -0.880 0.379
medicare_caid age_group35-44 0.568 0.352 -1.605 0.109
medicare_caid age_group45-54 0.666 0.362 -1.123 0.261
medicare_caid age_group55-65 1.566 0.313 1.435 0.151
othr_gov (Intercept) 0.091 0.377 -6.359 0.000
othr_gov age_group25-34 1.099 0.462 0.205 0.838
othr_gov age_group35-44 0.966 0.483 -0.072 0.943
othr_gov age_group45-54 1.037 0.500 0.072 0.942
othr_gov age_group55-65 1.165 0.493 0.309 0.757
uninsrd_NA (Intercept) 0.484 0.191 -3.811 0.000
uninsrd_NA age_group25-34 0.309 0.293 -4.007 0.000
uninsrd_NA age_group35-44 0.197 0.347 -4.674 0.000
uninsrd_NA age_group45-54 0.362 0.315 -3.234 0.001
uninsrd_NA age_group55-65 0.180 0.396 -4.332 0.000

Model 2

ins_mlm2 <- multinom(insurance ~ age_group + race, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  40 (28 variable)
## initial  value 1486.639404 
## iter  10 value 1018.895977
## iter  20 value 1002.876330
## iter  30 value 1002.839331
## final  value 1002.839301 
## converged
kable(tidy(ins_mlm2, exponentiate = T), 
      caption= "Model 2", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.062 0.470 -5.900 0.000
indvdl_othr age_group25-34 1.128 0.570 0.212 0.832
indvdl_othr age_group35-44 1.547 0.556 0.785 0.433
indvdl_othr age_group45-54 2.363 0.544 1.582 0.114
indvdl_othr age_group55-65 2.111 0.553 1.352 0.176
indvdl_othr raceB 0.422 0.942 -0.917 0.359
indvdl_othr raceH 0.524 0.657 -0.982 0.326
medicare_caid (Intercept) 0.216 0.257 -5.974 0.000
medicare_caid age_group25-34 0.753 0.325 -0.875 0.381
medicare_caid age_group35-44 0.568 0.353 -1.602 0.109
medicare_caid age_group45-54 0.685 0.363 -1.043 0.297
medicare_caid age_group55-65 1.631 0.315 1.554 0.120
medicare_caid raceB 1.492 0.421 0.951 0.342
medicare_caid raceH 1.594 0.316 1.475 0.140
othr_gov (Intercept) 0.060 0.403 -7.004 0.000
othr_gov age_group25-34 1.044 0.470 0.091 0.927
othr_gov age_group35-44 0.926 0.491 -0.156 0.876
othr_gov age_group45-54 1.107 0.510 0.199 0.842
othr_gov age_group55-65 1.323 0.504 0.555 0.579
othr_gov raceB 6.275 0.394 4.659 0.000
othr_gov raceH 3.730 0.368 3.575 0.000
uninsrd_NA (Intercept) 0.388 0.205 -4.618 0.000
uninsrd_NA age_group25-34 0.301 0.298 -4.031 0.000
uninsrd_NA age_group35-44 0.193 0.351 -4.681 0.000
uninsrd_NA age_group45-54 0.377 0.319 -3.058 0.002
uninsrd_NA age_group55-65 0.194 0.400 -4.102 0.000
uninsrd_NA raceB 3.324 0.370 3.249 0.001
uninsrd_NA raceH 2.322 0.302 2.785 0.005

Model 3

ins_mlm3 <- multinom(insurance ~ age_group + race + 
                       region, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  50 (36 variable)
## initial  value 1486.639404 
## iter  10 value 992.679394
## iter  20 value 978.742810
## iter  30 value 978.700528
## final  value 978.700493 
## converged
kable(tidy(ins_mlm3, exponentiate = T), 
      caption= "Model 3", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.053 0.492 -5.967 0.000
indvdl_othr age_group25-34 1.160 0.572 0.260 0.795
indvdl_othr age_group35-44 1.583 0.558 0.824 0.410
indvdl_othr age_group45-54 2.449 0.546 1.642 0.101
indvdl_othr age_group55-65 2.140 0.554 1.372 0.170
indvdl_othr raceB 0.458 0.943 -0.829 0.407
indvdl_othr raceH 0.479 0.660 -1.115 0.265
indvdl_othr regionEasternWA 2.798 0.418 2.459 0.014
indvdl_othr regionWesternWA 1.096 0.325 0.281 0.779
medicare_caid (Intercept) 0.120 0.292 -7.272 0.000
medicare_caid age_group25-34 0.807 0.331 -0.650 0.516
medicare_caid age_group35-44 0.594 0.359 -1.454 0.146
medicare_caid age_group45-54 0.721 0.369 -0.889 0.374
medicare_caid age_group55-65 1.647 0.321 1.553 0.120
medicare_caid raceB 1.750 0.428 1.308 0.191
medicare_caid raceH 1.467 0.325 1.180 0.238
medicare_caid regionEasternWA 3.958 0.321 4.283 0.000
medicare_caid regionWesternWA 2.761 0.224 4.541 0.000
othr_gov (Intercept) 0.035 0.443 -7.597 0.000
othr_gov age_group25-34 1.084 0.475 0.171 0.864
othr_gov age_group35-44 0.926 0.496 -0.156 0.876
othr_gov age_group45-54 1.080 0.515 0.149 0.882
othr_gov age_group55-65 1.289 0.508 0.499 0.618
othr_gov raceB 7.024 0.405 4.809 0.000
othr_gov raceH 3.805 0.376 3.557 0.000
othr_gov regionEasternWA 1.601 0.585 0.806 0.421
othr_gov regionWesternWA 3.247 0.304 3.878 0.000
uninsrd_NA (Intercept) 0.331 0.230 -4.805 0.000
uninsrd_NA age_group25-34 0.308 0.300 -3.927 0.000
uninsrd_NA age_group35-44 0.198 0.353 -4.591 0.000
uninsrd_NA age_group45-54 0.394 0.321 -2.902 0.004
uninsrd_NA age_group55-65 0.197 0.401 -4.056 0.000
uninsrd_NA raceB 3.492 0.373 3.357 0.001
uninsrd_NA raceH 2.161 0.306 2.515 0.012
uninsrd_NA regionEasternWA 2.424 0.340 2.605 0.009
uninsrd_NA regionWesternWA 1.174 0.250 0.641 0.522

Model 4

ins_mlm4 <- multinom(insurance ~ age_group + race + 
                       region + diag.status, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  55 (40 variable)
## initial  value 1486.639404 
## iter  10 value 983.360636
## iter  20 value 968.193265
## iter  30 value 968.135699
## final  value 968.135623 
## converged
kable(tidy(ins_mlm4, exponentiate = T), 
      caption= "Model 4", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.053 0.492 -5.957 0.000
indvdl_othr age_group25-34 1.171 0.572 0.276 0.783
indvdl_othr age_group35-44 1.636 0.559 0.881 0.378
indvdl_othr age_group45-54 2.557 0.547 1.715 0.086
indvdl_othr age_group55-65 2.351 0.563 1.519 0.129
indvdl_othr raceB 0.457 0.943 -0.830 0.407
indvdl_othr raceH 0.485 0.661 -1.094 0.274
indvdl_othr regionEasternWA 2.757 0.419 2.420 0.016
indvdl_othr regionWesternWA 1.079 0.326 0.234 0.815
indvdl_othr diag.status 0.639 0.540 -0.828 0.407
medicare_caid (Intercept) 0.118 0.292 -7.297 0.000
medicare_caid age_group25-34 0.796 0.331 -0.690 0.490
medicare_caid age_group35-44 0.558 0.362 -1.613 0.107
medicare_caid age_group45-54 0.671 0.372 -1.073 0.283
medicare_caid age_group55-65 1.423 0.336 1.052 0.293
medicare_caid raceB 1.724 0.429 1.270 0.204
medicare_caid raceH 1.445 0.325 1.134 0.257
medicare_caid regionEasternWA 4.094 0.323 4.370 0.000
medicare_caid regionWesternWA 2.815 0.225 4.609 0.000
medicare_caid diag.status 1.680 0.312 1.662 0.096
othr_gov (Intercept) 0.035 0.443 -7.574 0.000
othr_gov age_group25-34 1.017 0.476 0.036 0.971
othr_gov age_group35-44 0.725 0.512 -0.627 0.530
othr_gov age_group45-54 0.884 0.523 -0.236 0.813
othr_gov age_group55-65 0.860 0.539 -0.279 0.781
othr_gov raceB 6.845 0.411 4.676 0.000
othr_gov raceH 3.493 0.380 3.290 0.001
othr_gov regionEasternWA 1.787 0.586 0.991 0.322
othr_gov regionWesternWA 3.314 0.306 3.911 0.000
othr_gov diag.status 3.037 0.394 2.822 0.005
uninsrd_NA (Intercept) 0.332 0.230 -4.783 0.000
uninsrd_NA age_group25-34 0.316 0.300 -3.844 0.000
uninsrd_NA age_group35-44 0.220 0.354 -4.281 0.000
uninsrd_NA age_group45-54 0.442 0.323 -2.523 0.012
uninsrd_NA age_group55-65 0.256 0.406 -3.361 0.001
uninsrd_NA raceB 3.451 0.374 3.314 0.001
uninsrd_NA raceH 2.232 0.309 2.600 0.009
uninsrd_NA regionEasternWA 2.327 0.341 2.479 0.013
uninsrd_NA regionWesternWA 1.154 0.251 0.571 0.568
uninsrd_NA diag.status 0.097 1.186 -1.967 0.049

Model 5

We keep the missing income level as a predictor in the model

ins_mlm5 <- multinom(insurance ~ age_group + race + 
                       region + diag.status + income_cat, 
                     data = insDF, 
                     weights = ego.wawt)
## # weights:  85 (64 variable)
## initial  value 1486.639404 
## iter  10 value 846.422845
## iter  20 value 810.564439
## iter  30 value 810.111301
## iter  40 value 809.984911
## iter  50 value 809.966462
## iter  60 value 809.964342
## final  value 809.964118 
## converged
kable(tidy(ins_mlm5, exponentiate = T), 
      caption= "Model 5", digits = 3) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
y.level term estimate std.error statistic p.value
indvdl_othr (Intercept) 0.022 0.559 -6.790 0.000
indvdl_othr age_group25-34 1.842 0.591 1.034 0.301
indvdl_othr age_group35-44 3.048 0.587 1.900 0.057
indvdl_othr age_group45-54 5.316 0.583 2.865 0.004
indvdl_othr age_group55-65 4.293 0.596 2.446 0.014
indvdl_othr raceB 0.422 0.947 -0.912 0.362
indvdl_othr raceH 0.452 0.671 -1.183 0.237
indvdl_othr regionEasternWA 2.089 0.438 1.680 0.093
indvdl_othr regionWesternWA 0.859 0.340 -0.446 0.656
indvdl_othr diag.status 0.580 0.552 -0.988 0.323
indvdl_othr income_catfpl138 8.817 0.553 3.933 0.000
indvdl_othr income_catfpl138-300 3.382 0.378 3.221 0.001
indvdl_othr income_catfpl300-400 1.716 0.510 1.059 0.290
indvdl_othr income_catfpl400-500 0.397 1.149 -0.803 0.422
indvdl_othr income_catfpl500-600 0.466 1.378 -0.554 0.580
indvdl_othr income_catMissing 2.254 0.707 1.149 0.251
medicare_caid (Intercept) 0.007 0.489 -10.252 0.000
medicare_caid age_group25-34 2.052 0.392 1.834 0.067
medicare_caid age_group35-44 1.900 0.438 1.465 0.143
medicare_caid age_group45-54 3.068 0.457 2.453 0.014
medicare_caid age_group55-65 5.930 0.418 4.255 0.000
medicare_caid raceB 1.497 0.511 0.791 0.429
medicare_caid raceH 1.286 0.379 0.664 0.507
medicare_caid regionEasternWA 2.080 0.386 1.896 0.058
medicare_caid regionWesternWA 1.603 0.269 1.752 0.080
medicare_caid diag.status 1.556 0.382 1.157 0.247
medicare_caid income_catfpl138 167.038 0.474 10.796 0.000
medicare_caid income_catfpl138-300 15.340 0.409 6.668 0.000
medicare_caid income_catfpl300-400 3.927 0.558 2.451 0.014
medicare_caid income_catfpl400-500 5.874 0.593 2.988 0.003
medicare_caid income_catfpl500-600 0.000 358.988 -0.031 0.975
medicare_caid income_catMissing 23.694 0.515 6.150 0.000
othr_gov (Intercept) 0.016 0.528 -7.819 0.000
othr_gov age_group25-34 1.360 0.496 0.620 0.535
othr_gov age_group35-44 1.223 0.543 0.371 0.711
othr_gov age_group45-54 1.620 0.561 0.860 0.390
othr_gov age_group55-65 1.500 0.561 0.723 0.470
othr_gov raceB 6.852 0.421 4.572 0.000
othr_gov raceH 3.181 0.391 2.960 0.003
othr_gov regionEasternWA 1.439 0.595 0.611 0.541
othr_gov regionWesternWA 2.663 0.316 3.103 0.002
othr_gov diag.status 2.857 0.404 2.601 0.009
othr_gov income_catfpl138 6.317 0.588 3.134 0.002
othr_gov income_catfpl138-300 2.549 0.417 2.245 0.025
othr_gov income_catfpl300-400 1.130 0.577 0.212 0.832
othr_gov income_catfpl400-500 2.615 0.529 1.818 0.069
othr_gov income_catfpl500-600 1.247 0.917 0.241 0.810
othr_gov income_catMissing 3.902 0.598 2.275 0.023
uninsrd_NA (Intercept) 0.058 0.383 -7.430 0.000
uninsrd_NA age_group25-34 0.616 0.341 -1.419 0.156
uninsrd_NA age_group35-44 0.627 0.404 -1.155 0.248
uninsrd_NA age_group45-54 1.379 0.395 0.812 0.417
uninsrd_NA age_group55-65 0.508 0.465 -1.457 0.145
uninsrd_NA raceB 4.036 0.404 3.454 0.001
uninsrd_NA raceH 2.461 0.354 2.544 0.011
uninsrd_NA regionEasternWA 1.668 0.392 1.306 0.192
uninsrd_NA regionWesternWA 0.777 0.285 -0.887 0.375
uninsrd_NA diag.status 0.105 1.199 -1.882 0.060
uninsrd_NA income_catfpl138 13.954 0.486 5.426 0.000
uninsrd_NA income_catfpl138-300 5.617 0.366 4.712 0.000
uninsrd_NA income_catfpl300-400 0.932 0.664 -0.106 0.915
uninsrd_NA income_catfpl400-500 0.925 0.787 -0.099 0.921
uninsrd_NA income_catfpl500-600 0.000 525.987 -0.024 0.980
uninsrd_NA income_catMissing 35.040 0.400 8.899 0.000

Comparison of AICs

models <- get_formula("ins_mlm", 5)
aics <- get_aics("ins_mlm", 5, "mlm")

kable(data.frame(Model = models, 
                 AIC = aics), 
      caption= paste("Comparison of AICs; n =", dim(cDF)[[1]]),
      digits = 0) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Comparison of AICs; n = 923
Model AIC
insurance ~ age_group 2087
insurance ~ age_group + race 2062
insurance ~ age_group + race + region 2029
insurance ~ age_group + race + region + diag.status 2016
insurance ~ age_group + race + region + diag.status + income_cat 1748
  • Income is a powerful predictor, but we can’t use it in simulation, so we will use model 4.

Predicted distribution

  • The final model includes age, race, region and diagnostic status.
#### Construct and save output
expand_df <- expand.grid(
  age_group = names(table(wDF$age_group)),
  race = c("B", "H", "O"), 
  region = c("EasternWA", "King", "WesternWA"), 
  diag.status = c(0, 1)
  )

pred_ins <- cbind(expand_df, 
                  predict(ins_mlm4, 
                          newdata = expand_df, 
                          "probs")) 

# For some reason ZK included age_group in the model/prediction, but she
# averaged over it in the saved DF.  We don't do that here.  We also use
# pivot_longer in place of gathers

pred_insl <- pred_ins %>% 
  mutate(diag.status = ifelse(diag.status == 0, "HIV-", "HIV+")) %>%
  pivot_longer(c(emplyr:uninsrd_NA), 
               names_to = "insurance", 
               values_to = "prob")

# Save to output object
save_out$pred_ins <- pred_insl

By race, region and Dx

ggplot(data = pred_insl,
       aes(x = race, y = prob/6, # sums over omitted var age_group
           group = insurance, fill = insurance)) +
  geom_bar(stat="identity") +
  facet_wrap(diag.status ~ region) +
  labs(title = "Insurance distribution by race, Dx and region",
       caption = paste("Multinomial regression results based on n =",
                       dim(cDF)[[1]],
                       "; Dx HIV+ =",
                       scales::percent(proportions(table(cDF$diag.status))[[2]], 
                                       accuracy = 0.1),
                       "; assumes homogenous age distributions"),
       y = "predicted distribution") + 
  theme(strip.text = element_text(size = 10),
        axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12), 
        axis.text.x = element_text(size = 10, hjust = 0.5), 
        axis.text.y = element_text(size = 10), 
        legend.position = "bottom")

By age, region and Dx

ggplot(data = pred_insl,
       aes(x = age_group, y = prob/3, # sums over omitted var race 
           group = insurance, fill = insurance)) +
  geom_bar(stat="identity") +
  facet_wrap(diag.status ~ region) +
  labs(title = "Insurance distribution by Age, Dx and region",
       caption = paste("Multinomial regression results based on n =",
                       dim(cDF)[[1]],
                       "; Dx HIV+ =",
                       scales::percent(proportions(table(cDF$diag.status))[[2]], 
                                       accuracy = 0.1),
                       "; assumes homogenous race distributions"),
       y = "predicted distribution") + 
  theme(strip.text = element_text(size = 10),
        axis.title.x = element_text(size = 12), 
        axis.title.y = element_text(size = 12), 
        axis.text.x = element_text(size = 10, hjust = 0.5), 
        axis.text.y = element_text(size = 10), 
        legend.position = "bottom")


PrEP aware HIV-

Descriptive statistics

Overall

  • Very few HIV- are not aware of PrEP.
#with(wDF, table(PREP_AWARE, useNA = "al")

baseDF <- cDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) 

tempDF <- baseDF %>%
  group_by(PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of PrEP awareness") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))  %>%
  footnote(paste("HIV- ; n = ",
                 sum(tempDF$n)))
Distribution of PrEP awareness
PREP_AWARE n n.wtd prop.wtd
aware 697 691.1 0.93
unaware 52 49.8 0.07
Note:
HIV- ; n = 749

Age

Table

tempDF <- baseDF %>%
  group_by(age_group, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= "Distribution of PrEP awareness by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))
Distribution of PrEP awareness by age
Age group
PREP_AWARE 15-24 25-34 35-44 45-54 55-65
aware 0.9 0.96 0.95 0.95 0.88
unaware 0.1 0.04 0.05 0.05 0.12
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) +
  scale_colour_brewer(palette = "YlOrRd")

title.plotly <-
  list(text = paste0('PrEP awareness by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

tempDF <- baseDF %>%
  group_by(race, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))
Distribution of PrEP awareness by race
Race
PREP_AWARE B H O
aware 0.93 0.79 0.95
unaware 0.07 0.21 0.05
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- wDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) %>%
  group_by(region, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))
Distribution of PrEP awareness by region
Region
PREP_AWARE EasternWA King WesternWA
aware 0.8 0.97 0.91
unaware 0.2 0.03 0.09
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- wDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) %>%
  group_by(insurance, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))
Distribution of PrEP awareness by insurance
Insurance
PREP_AWARE emplyr indvdl_othr medicare_caid othr_gov uninsrd_NA
aware 0.96 0.99 0.89 0.91 0.75
unaware 0.04 0.01 0.11 0.09 0.25
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- wDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2)  %>%
  mutate(PREP_AWARE = ifelse(PREP_AWARE == 1, "aware", "unaware")) %>%
  group_by(income_cat, PREP_AWARE) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PrEP awareness by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Income" = 7))  %>%
  footnote(paste("HIV- only, wtd proportions; n = ",
                 sum(tempDF$n)))
Distribution of PrEP awareness by FPL Income Category
Income
PREP_AWARE fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
aware 0.8 0.93 0.95 0.94 0.96 0.96 0.89
unaware 0.2 0.07 0.05 0.06 0.04 0.04 0.11
Note:
HIV- only, wtd proportions; n = 749

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_AWARE, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PrEP awareness",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PrEP awareness by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Logistic regression

Model 1

library(survey)

prepDF <- cDF %>%
  filter(diag.status == 0 & PREP_AWARE < 2) %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"),
         insurance = relevel(factor(insurance), ref = "emplyr"))

design <- svydesign(id = ~Vrid, 
                    data = prepDF, 
                    weights = ~ego.wawt)

logitm1 <- svyglm(PREP_AWARE ~ age_group,
                  data = prepDF, 
                  family = quasibinomial(), 
                  design)

kable(tidy(logitm1, exponentiate = T), 
      caption= "Model 1", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 1
term estimate std.error statistic p.value
(Intercept) 8.83 0.32 6.89 0.00
age_group25-34 2.45 0.45 1.97 0.05
age_group35-44 2.34 0.54 1.58 0.11
age_group45-54 2.01 0.47 1.47 0.14
age_group55-65 0.86 0.46 -0.34 0.74

Model 2

logitm2 <- svyglm(PREP_AWARE ~ race,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm2, exponentiate = T), 
      caption= "Model 2", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
term estimate std.error statistic p.value
(Intercept) 20.17 0.18 16.27 0.00
raceB 0.63 0.71 -0.64 0.52
raceH 0.19 0.37 -4.59 0.00

Model 3

logitm3 <- svyglm(PREP_AWARE ~ region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm3, exponentiate = T), 
      caption= "Model 3", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
term estimate std.error statistic p.value
(Intercept) 35.08 0.35 10.29 0.0000
regionEasternWA 0.11 0.46 -4.74 0.0000
regionWesternWA 0.28 0.41 -3.11 0.0019

Model 4

logitm4 <- svyglm(PREP_AWARE ~ insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm4, exponentiate = T), 
      caption= "Model 4", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
term estimate std.error statistic p.value
(Intercept) 23.25 0.24 12.85 0.0000
insuranceindvdl_othr 3.03 1.04 1.06 0.2874
insurancemedicare_caid 0.34 0.42 -2.55 0.0109
insuranceothr_gov 0.46 0.59 -1.31 0.1902
insuranceuninsrd_NA 0.13 0.41 -4.94 0.0000

Model 5

logitm5 <- svyglm(PREP_AWARE ~ race + region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
term estimate std.error statistic p.value
(Intercept) 49.61 0.38 10.19 0.0000
raceB 0.54 0.72 -0.84 0.4023
raceH 0.21 0.36 -4.42 0.0000
regionEasternWA 0.13 0.45 -4.59 0.0000
regionWesternWA 0.27 0.42 -3.12 0.0019

Model 6

logitm6 <- svyglm(PREP_AWARE ~ race + region + insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm6, exponentiate = T), 
      caption= "Model 6", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 6
term estimate std.error statistic p.value
(Intercept) 62.86 0.41 10.03 0.0000
raceB 0.62 0.71 -0.69 0.4930
raceH 0.26 0.42 -3.25 0.0012
regionEasternWA 0.14 0.49 -3.99 0.0001
regionWesternWA 0.30 0.42 -2.88 0.0041
insuranceindvdl_othr 3.17 1.03 1.12 0.2629
insurancemedicare_caid 0.50 0.42 -1.62 0.1051
insuranceothr_gov 0.98 0.74 -0.03 0.9744
insuranceuninsrd_NA 0.21 0.44 -3.63 0.0003

Comparison of AICs

models <- get_formula("logitm", 6)
aics <- get_aics("logitm", 6, "logit")

kable(data.frame(Model = models, 
                 AIC = aics), 
      caption= "AIC comparison", digits = 1) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
AIC comparison
Model AIC
PREP_AWARE ~ age_group 368.5
PREP_AWARE ~ race 351.6
PREP_AWARE ~ region 343.2
PREP_AWARE ~ insurance 345.4
PREP_AWARE ~ race + region 328.8
PREP_AWARE ~ race + region + insurance 319.3
  • Insurance is a strong predictor, but we can’t simulate with it, so we will use model 5 with race + region.

Predicted distribution

#### Construct and save output
expand_df <- expand.grid(
  race = levels(prepDF$race),
  region = levels(prepDF$region))

pred_df <- cbind(expand_df, 
                 prob = predict(logitm5, 
                                newdata = expand_df, 
                                type = "response", 
                                npop = 1000))
pred_df <- pred_df %>% 
  select(-prob.SE) %>%
  rename(prob = prob.response)

## Save objects
save_out$pred_prep_aware <- pred_df
fig <- ggplot(data = pred_df, 
              aes(x = region, y = prob, text = round(prob,2),
                  group = race, fill = race)) +
  geom_bar(stat = "identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  theme(plot.margin = margin(t=30,r=20,b=20,l=20),
        legend.position = "bottom")

title.plotly <-
  list(text = paste0('Predicted PrEP Awareness by race and region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     dim(prepDF)[[1]],
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Ever used PrEP HIV-

  • For PrEP aware respondents.

Descriptive statistics

Overall

#with(wDF, table(diag.status, PREP_EVER, useNA ="al"))
# there are a handful of HIV+ who have ever used prep, we exclude them

baseDF <- cDF %>%
  filter(diag.status == 0 & PREP_EVER < 2) %>%
  mutate(PREP_EVER = ifelse(PREP_EVER== 1, 
                            "ever used prep",
                            "never used prep")) 

tempDF <- baseDF %>%
  group_by(PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of ever PrEP (HIV-)") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Distribution of ever PrEP (HIV-)
PREP_EVER n n.wtd prop.wtd
ever used prep 222 225.2 0.37
never used prep 395 387.8 0.63

Age

Table

tempDF <- baseDF %>%
  group_by(age_group, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= "Distribution of Ever PrEP use by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of Ever PrEP use by age
Age group
PREP_EVER 15-24 25-34 35-44 45-54 55-65
ever used prep 0.21 0.42 0.4 0.41 0.28
never used prep 0.79 0.58 0.6 0.59 0.72
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Ever PrEP use by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

tempDF <- baseDF %>%
  group_by(race, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of Ever PrEP use by race
Race
PREP_EVER B H O
ever used prep 0.44 0.29 0.37
never used prep 0.56 0.71 0.63
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

tempDF <- baseDF %>%
  group_by(region, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of Ever PrEP use by region
Region
PREP_EVER EasternWA King WesternWA
ever used prep 0.21 0.42 0.31
never used prep 0.79 0.58 0.69
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- baseDF %>%
  group_by(insurance, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of Ever PrEP use by insurance
Insurance
PREP_EVER emplyr indvdl_othr medicare_caid othr_gov uninsrd_NA
ever used prep 0.38 0.4 0.35 0.41 0.21
never used prep 0.62 0.6 0.65 0.59 0.79
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- baseDF %>%
  group_by(income_cat, PREP_EVER) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of Ever PrEP use by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL Income category" = 7)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of Ever PrEP use by FPL Income Category
FPL Income category
PREP_EVER fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
ever used prep 0.16 0.3 0.45 0.42 0.61 0.41 0.23
never used prep 0.84 0.7 0.55 0.58 0.39 0.59 0.77
Note:
HIV- proportions; n = 617

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_EVER, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Ever PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Ever PrEP use by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Prediction

Logistic regression

Model 1

prepDF <- cDF %>%
  filter(diag.status == 0 & PREP_EVER < 2) %>%
  mutate(age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"),
         insurance = relevel(factor(insurance), ref = "emplyr"))

design <- svydesign(id = ~Vrid, 
                    data = prepDF, 
                    weights = ~ego.wawt)

logitm1 <- svyglm(PREP_EVER ~ age_group,
                  data = prepDF, 
                  family = quasibinomial(), 
                  design)

kable(tidy(logitm1, exponentiate = T), 
      caption= "Model 1", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 1
term estimate std.error statistic p.value
(Intercept) 0.27 0.32 -4.15 0.00
age_group25-34 2.74 0.35 2.88 0.00
age_group35-44 2.51 0.39 2.37 0.02
age_group45-54 2.61 0.38 2.56 0.01
age_group55-65 1.42 0.40 0.88 0.38

Model 2

logitm2 <- svyglm(PREP_EVER ~ race,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm2, exponentiate = T), 
      caption= "Model 2", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
term estimate std.error statistic p.value
(Intercept) 0.59 0.09 -5.77 0.00
raceB 1.35 0.66 0.45 0.65
raceH 0.70 0.32 -1.12 0.26

Model 3

logitm3 <- svyglm(PREP_EVER ~ region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm3, exponentiate = T), 
      caption= "Model 3", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
term estimate std.error statistic p.value
(Intercept) 0.72 0.13 -2.53 0.0117
regionEasternWA 0.37 0.33 -3.05 0.0024
regionWesternWA 0.63 0.19 -2.38 0.0176

Model 4

logitm4 <- svyglm(PREP_EVER ~ insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm4, exponentiate = T), 
      caption= "Model 4", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
term estimate std.error statistic p.value
(Intercept) 0.60 0.11 -4.45 0.0000
insuranceindvdl_othr 1.12 0.34 0.34 0.7377
insurancemedicare_caid 0.89 0.28 -0.41 0.6802
insuranceothr_gov 1.16 0.38 0.39 0.6998
insuranceuninsrd_NA 0.45 0.47 -1.69 0.0907

Model 5

logitm5 <- svyglm(PREP_EVER ~ income_cat,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
term estimate std.error statistic p.value
(Intercept) 0.68 0.14 -2.68 0.0076
income_catfpl138 0.27 0.38 -3.37 0.0008
income_catfpl138-300 0.61 0.25 -1.96 0.0500
income_catfpl300-400 1.18 0.31 0.54 0.5891
income_catfpl400-500 1.08 0.36 0.21 0.8366
income_catfpl500-600 2.26 0.50 1.64 0.1015
income_catMissing 0.45 0.54 -1.49 0.1363

Model 6

logitm6 <- svyglm(PREP_EVER ~ age_group + region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm6, exponentiate = T), 
      caption= "Model 6", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 6
term estimate std.error statistic p.value
(Intercept) 0.34 0.33 -3.23 0.0013
age_group25-34 2.63 0.35 2.73 0.0064
age_group35-44 2.37 0.39 2.23 0.0263
age_group45-54 2.48 0.38 2.38 0.0177
age_group55-65 1.39 0.40 0.82 0.4115
regionEasternWA 0.38 0.33 -3.01 0.0028
regionWesternWA 0.68 0.19 -2.01 0.0450

Model 7

logitm7 <- svyglm(PREP_EVER ~ age_group + region + income_cat,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm7, exponentiate = T), 
      caption= "Model 7", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 7
term estimate std.error statistic p.value
(Intercept) 0.41 0.36 -2.47 0.0138
age_group25-34 2.21 0.36 2.21 0.0278
age_group35-44 1.96 0.39 1.73 0.0848
age_group45-54 2.05 0.39 1.84 0.0665
age_group55-65 1.27 0.41 0.58 0.5620
regionEasternWA 0.39 0.33 -2.81 0.0051
regionWesternWA 0.72 0.20 -1.65 0.0990
income_catfpl138 0.38 0.39 -2.51 0.0124
income_catfpl138-300 0.74 0.25 -1.19 0.2345
income_catfpl300-400 1.41 0.31 1.09 0.2741
income_catfpl400-500 1.13 0.36 0.35 0.7296
income_catfpl500-600 2.34 0.52 1.63 0.1030
income_catMissing 0.63 0.56 -0.84 0.4031

Comparison of AICs

models <- get_formula("logitm", 7)
aics <- get_aics("logitm", 7, "logit")

kable(data.frame(Model = models, 
                 AIC = aics),
      caption= "AIC comparison", digits = 1) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
AIC comparison
Model AIC
PREP_EVER ~ age_group 804.7
PREP_EVER ~ race 819.1
PREP_EVER ~ region 803.0
PREP_EVER ~ insurance 815.9
PREP_EVER ~ income_cat 800.1
PREP_EVER ~ age_group + region 798.0
PREP_EVER ~ age_group + region + income_cat 795.9
  • Income has moderate predictive value, but we can’t use it in simulation, so we use model 6 with age and region.

Predicted distribution

Construct and save output

expand_df <- expand.grid(
  age_group = levels(prepDF$age_group),
  region = levels(prepDF$region))

pred_df <- cbind(expand_df, 
                 prob = predict(logitm6, 
                                newdata = expand_df, 
                                type = "response", 
                                npop = 1000))
pred_df <- pred_df %>% 
  select(-prob.SE) %>%
  rename(prob = prob.response)

## Save objects
save_out$pred_prep_ever <- pred_df

Plot

fig <- ggplot(data = pred_df, 
              aes(x = age_group, y = prob, text = round(prob,2),
                  group = region, color = region)) +
  geom_line(alpha = 0.7) +
  geom_point() +
  labs(x = "age group", y = "predicted probability") +
  theme(plot.margin = margin(t=30,r=20,b=20,l=20),
        legend.position = "bottom")

title.plotly <-
  list(text = paste0('Predicted Ever PrEP use by age and region',
                     '<br>',
                     '<sup>',
                     'n = ',
                     dim(prepDF)[[1]],
                     '; race was not a significant predictor',
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Current PrEP use

  • This is not used as an input parameter but as a target or validation at the end.

  • Variable: PREP_CURRENT

Descriptive statistics

  • Of the 227 respondents who report ever taking PrEP, 71% are currently on PrEP.

Overall

# NA is a mix of not aware + never prep; we recode them to 0
#with(wDF, table(diag.status, PREP_CURRENT, useNA ="al"))

baseDF <- cDF %>%
  filter(diag.status == 0) %>%
  mutate(PREP_CURRENT = ifelse(is.na(PREP_CURRENT) | PREP_CURRENT == 0, 
                            "not on prep",
                            "on prep")) 

tempDF <- baseDF %>%
  group_by(PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of current PrEP use (HIV-)") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Distribution of current PrEP use (HIV-)
PREP_CURRENT n n.wtd prop.wtd
not on prep 670 661.0 0.8
on prep 161 167.5 0.2

Age

Table

prep_curr_age <- baseDF %>%
  group_by(age_group, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

prep_curr_age %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = age_group,
              values_from = prop.wtd) %>%
  kable(caption= "Distribution of current PrEP use use by age") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Age group" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(prep_curr_age$n)))
Distribution of current PrEP use use by age
Age group
PREP_CURRENT 15-24 25-34 35-44 45-54 55-65
not on prep 0.94 0.77 0.74 0.72 0.85
on prep 0.06 0.23 0.26 0.28 0.15
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(prep_curr_age, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('Current PrEP use by age',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

prep_curr_race <- baseDF %>%
  group_by(race, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

prep_curr_race %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = race,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Race" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(prep_curr_race$n)))
Distribution of current PrEP use use by race
Race
PREP_CURRENT B H O
not on prep 0.69 0.87 0.8
on prep 0.31 0.12 0.2
Note:
HIV- proportions; n = 831
# What ZK saved was just a table of current prep use by race.  
# But we will want to use the full attr breakdown for targets

Plot

fig <- ggplot(prep_curr_race, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

prep_curr_region <- baseDF %>%
  group_by(region, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

prep_curr_region %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = region,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Region" = 3)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(prep_curr_region$n)))
Distribution of current PrEP use use by region
Region
PREP_CURRENT EasternWA King WesternWA
not on prep 0.92 0.75 0.84
on prep 0.08 0.25 0.16
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(prep_curr_region, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- baseDF %>%
  group_by(insurance, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of current PrEP use use by insurance
Insurance
PREP_CURRENT emplyr indvdl_othr medicare_caid othr_gov uninsrd_NA
not on prep 0.76 0.73 0.87 0.79 0.97
on prep 0.24 0.27 0.13 0.21 0.03
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- baseDF %>%
  group_by(income_cat, PREP_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of current PrEP use use by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL Income category" = 7)) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$n)))
Distribution of current PrEP use use by FPL Income Category
FPL Income category
PREP_CURRENT fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
not on prep 0.95 0.87 0.7 0.78 0.59 0.73 0.96
on prep 0.05 0.13 0.3 0.22 0.41 0.27 0.04
Note:
HIV- proportions; n = 831

Plot

fig <- ggplot(tempDF, 
              aes(x = PREP_CURRENT, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "Current PrEP use",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('Current PrEP use by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

SNAP

Table

tempDF <- wDF %>% select(PREP_CURRENT, snap, ego.wawt) %>%
  mutate(snapt = pmin(snap, 20)) %>%
  group_by(PREP_CURRENT, snapt) %>%
  summarize(num = round(sum(ego.wawt))) %>%
  group_by(snapt) %>%
  mutate(snap.n = sum(num),
         prop = round(100*num/sum(num), 1)) 

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  pivot_wider(names_from = snapt) %>%
  kable(caption= "Current PrEP use use by SNAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$snap.n), 
                 "; snap topcoded at 5+"))
Current PrEP use use by SNAP
PREP_CURRENT num snap.n prop
1 3 110 2.7
1 13 229 5.7
1 9 67 13.4
1 23 57 40.4
1 14 36 38.9
1 8 40 20.0
1 11 22 50.0
1 1 7 14.3
1 6 17 35.3
1 4 5 80.0
1 8 23 34.8
1 4 15 26.7
1 1 1 100.0
1 15 17 88.2
1 1 3 33.3
1 39 70 55.7
1 8 207 3.9
Note:
HIV- proportions; n = 2066 ; snap topcoded at 5+

Plot

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  ggplot(aes(x=snapt, y=prop, weight=snap.n)) +
  ylim(c(0, 100)) +
  labs(title = "Current PrEP use by SNAP",
       x = "Number of anal sex partners last year",
       y = "Proportion on PrEP") +
  geom_line() +
  geom_point(aes(size=snap.n), color = "blue", alpha = 0.5)+
  geom_smooth(span = 1)

Deg.tot

Table

tempDF <- wDF %>% select(PREP_CURRENT, deg.tot, ego.wawt) %>%
  filter(!is.na(deg.tot)) %>%
  mutate(deg.tot = pmin(deg.tot, 3)) %>%
  group_by(PREP_CURRENT, deg.tot) %>%
  summarize(num = round(sum(ego.wawt))) %>%
  group_by(deg.tot) %>%
  mutate(deg.tot.n = sum(num),
         prop = round(100*num/sum(num), 1)) 

tempDF %>%   filter(PREP_CURRENT == 1) %>%
  kable(caption= "Current PrEP use use by deg.tot") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("HIV- proportions; n = ",
                 sum(tempDF$deg.tot.n), 
                 "; deg.tot topcoded at 3+"))
Current PrEP use use by deg.tot
PREP_CURRENT deg.tot num deg.tot.n prop
1 0 25 281 8.9
1 1 50 292 17.1
1 2 22 81 27.2
1 3 63 116 54.3
Note:
HIV- proportions; n = 1540 ; deg.tot topcoded at 3+

Plot

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  ggplot(aes(x=deg.tot, y=prop, weight=deg.tot.n)) +
    ylim(c(0, 100)) +
    labs(title = "Current PrEP use by total degree",
    caption = paste("HIV- proportions; n = ",
      sum(tempDF$deg.tot.n), 
      "; deg.tot topcoded at 3+"),
    x = "Degree (Main + Casl)",
    y = "Proportion on PrEP") +
  geom_line() +
  geom_point(aes(size=deg.tot.n), color = "blue", alpha = 0.5)

Deg tot by SNAP

Plot

tempDF <- wDF %>% 
  select(PREP_CURRENT, snap, deg.tot, ego.wawt) %>%
  filter(!is.na(snap) & !is.na(deg.tot)) %>%
  mutate(snapt = pmin(snap, 5),
         conc = ifelse(deg.tot < 2, 0, 1)) %>%
  group_by(PREP_CURRENT, snapt, conc) %>%
  summarize(num = round(sum(ego.wawt))) %>%
  group_by(snapt, conc) %>%
  mutate(snap.n = sum(num),
         prop = round(100*num/sum(num), 1)) 

tempDF %>%
  filter(PREP_CURRENT == 1) %>%
  ggplot(aes(x=snapt, y=prop, 
             group = factor(conc), color = factor(conc),
             weight=snap.n)) +
  ylim(c(0, 100)) +
  labs(title = "Current PrEP use by SNAP & Concurrent Persistent P",
       x = "Number of anal sex partners last year",
       y = "Proportion on PrEP") +
  geom_line() +
  geom_point(aes(size=snap.n), alpha = 0.5)

Prediction

Logistic regression

Model 1

prepDF <- baseDF %>%
  filter(!is.na(snap)) %>%
  mutate(PREP_CURRENT = ifelse(PREP_CURRENT=="on prep", 1, 0),
         age_group = relevel(factor(age_group), ref = "15-24"),
         race = relevel(factor(race), ref = "O"),
         region = relevel(factor(region), ref = "King"),
         income_cat = relevel(factor(income_cat), ref = "fpl600+"),
         insurance = relevel(factor(insurance), ref = "emplyr"),
         snapt = pmin(snap, 20),
         snapb = ifelse(snap > 5, 1, 0),
         deg.tot = pmin(deg.tot, 3),
         deg.tot.b = ifelse(deg.tot==3, 1, 0))

design <- svydesign(id = ~Vrid, 
                    data = prepDF, 
                    weights = ~ego.wawt)

logitm1 <- svyglm(PREP_CURRENT ~ age_group,
                  data = prepDF, 
                  family = quasibinomial(), 
                  design)

kable(tidy(logitm1, exponentiate = T), 
      caption= "Model 1", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 1
term estimate std.error statistic p.value
(Intercept) 0.10 0.44 -5.37 0.00
age_group25-34 3.60 0.47 2.73 0.01
age_group35-44 4.76 0.51 3.05 0.00
age_group45-54 5.28 0.49 3.43 0.00
age_group55-65 2.71 0.51 1.96 0.05

Model 2

logitm2 <- svyglm(PREP_CURRENT ~ race,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm2, exponentiate = T), 
      caption= "Model 2", digits = 2) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 2
term estimate std.error statistic p.value
(Intercept) 0.33 0.10 -10.90 0.00
raceB 2.56 0.65 1.44 0.15
raceH 0.53 0.34 -1.85 0.07

Model 3

logitm3 <- svyglm(PREP_CURRENT ~ region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm3, exponentiate = T), 
      caption= "Model 3", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 3
term estimate std.error statistic p.value
(Intercept) 0.42 0.15 -5.83 0.0000
regionEasternWA 0.27 0.38 -3.50 0.0005
regionWesternWA 0.61 0.21 -2.34 0.0198

Model 4

logitm4 <- svyglm(PREP_CURRENT ~ insurance,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm4, exponentiate = T), 
      caption= "Model 4", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 4
term estimate std.error statistic p.value
(Intercept) 0.37 0.13 -7.52 0.0000
insuranceindvdl_othr 1.31 0.36 0.74 0.4573
insurancemedicare_caid 0.46 0.34 -2.27 0.0236
insuranceothr_gov 1.22 0.44 0.45 0.6521
insuranceuninsrd_NA 0.16 0.64 -2.90 0.0039

Model 5

logitm5 <- svyglm(PREP_CURRENT ~ income_cat,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
term estimate std.error statistic p.value
(Intercept) 0.45 0.16 -5.03 0.0000
income_catfpl138 0.12 0.49 -4.21 0.0000
income_catfpl138-300 0.47 0.29 -2.62 0.0090
income_catfpl300-400 1.10 0.34 0.28 0.7806
income_catfpl400-500 0.73 0.41 -0.76 0.4463
income_catfpl500-600 2.08 0.46 1.58 0.1147
income_catMissing 0.20 0.80 -1.99 0.0465

Model 6

logitm5 <- svyglm(PREP_CURRENT ~ poly(snapt, 2),
                  data = prepDF %>% filter(!is.na(snapt)), 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
term estimate std.error statistic p.value
(Intercept) 2.400000e-01 0.14 -10.01 0
poly(snapt, 2)1 1.128082e+12 2.56 10.82 0
poly(snapt, 2)2 0.000000e+00 2.42 -4.96 0

Model 7

logitm5 <- svyglm(PREP_CURRENT ~ factor(deg.tot),
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm5, exponentiate = T), 
      caption= "Model 5", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 5
term estimate std.error statistic p.value
(Intercept) 0.14 0.21 -9.42 0.0000
factor(deg.tot)1 1.70 0.27 1.98 0.0486
factor(deg.tot)2 3.61 0.34 3.81 0.0002
factor(deg.tot)3 11.56 0.33 7.36 0.0000

Model 8

logitm6 <- svyglm(PREP_CURRENT ~ age_group + race + region,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm6, exponentiate = T), 
      caption= "Model 6", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 6
term estimate std.error statistic p.value
(Intercept) 0.13 0.46 -4.43 0.0000
age_group25-34 3.20 0.47 2.46 0.0143
age_group35-44 4.26 0.49 2.99 0.0029
age_group45-54 4.55 0.49 3.10 0.0020
age_group55-65 2.55 0.51 1.82 0.0695
raceB 1.96 0.62 1.09 0.2756
raceH 0.61 0.37 -1.33 0.1827
regionEasternWA 0.31 0.37 -3.20 0.0014
regionWesternWA 0.63 0.21 -2.19 0.0286

Model 9

logitm7 <- svyglm(PREP_CURRENT ~ age_group + race + region + 
                    factor(deg.tot),
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm7, exponentiate = T), 
      caption= "Model 7", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 7
term estimate std.error statistic p.value
(Intercept) 0.07 0.49 -5.31 0.0000
age_group25-34 3.09 0.47 2.41 0.0163
age_group35-44 3.08 0.49 2.31 0.0210
age_group45-54 4.60 0.48 3.15 0.0017
age_group55-65 1.91 0.52 1.24 0.2170
raceB 0.80 0.56 -0.41 0.6837
raceH 0.44 0.42 -1.94 0.0528
regionEasternWA 0.30 0.42 -2.86 0.0044
regionWesternWA 0.63 0.24 -1.96 0.0501
factor(deg.tot)1 1.47 0.28 1.36 0.1739
factor(deg.tot)2 3.00 0.34 3.19 0.0015
factor(deg.tot)3 11.75 0.33 7.45 0.0000

Model 10

logitm8 <- svyglm(PREP_CURRENT ~ age_group + race + region + 
                    deg.tot.b + snapb,
                  data = prepDF, 
                  family=quasibinomial(), 
                  design)
kable(tidy(logitm8, exponentiate = T), 
      caption= "Model 8", digits = c(rep(2,4), 4)) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
Model 8
term estimate std.error statistic p.value
(Intercept) 0.08 0.43 -5.95 0.0000
age_group25-34 2.99 0.46 2.40 0.0167
age_group35-44 3.17 0.47 2.45 0.0147
age_group45-54 3.95 0.48 2.87 0.0043
age_group55-65 1.82 0.52 1.15 0.2512
raceB 1.29 0.68 0.37 0.7080
raceH 0.38 0.46 -2.09 0.0369
regionEasternWA 0.25 0.44 -3.14 0.0018
regionWesternWA 0.58 0.25 -2.25 0.0250
deg.tot.b 2.95 0.36 3.00 0.0028
snapb 5.71 0.28 6.21 0.0000

Comparison of AICs

models <- get_formula("logitm", 8)
aics <- get_aics("logitm", 8, "logit")

kable(data.frame(Model = models, 
                 AIC = aics),
      caption= "AIC comparison", digits = 1) %>% 
  kable_styling(full_width=F, position="center",
                bootstrap_options = c("striped"))
AIC comparison
Model AIC
PREP_CURRENT ~ age_group 707.7
PREP_CURRENT ~ race 720.7
PREP_CURRENT ~ region 711.4
PREP_CURRENT ~ insurance 712.0
PREP_CURRENT ~ factor(deg.tot) 640.0
PREP_CURRENT ~ age_group + race + region 698.9
PREP_CURRENT ~ age_group + race + region + factor(deg.tot) 622.7
PREP_CURRENT ~ age_group + race + region + deg.tot.b + snapb 586.8

Save out targets

Given the small number on PrEP, we can only construct the marginal breakdowns by attributes.

# just pull the proportions currently on prep
age <- prep_curr_age %>% 
  filter(PREP_CURRENT=="on prep") %>%
  select(-PREP_CURRENT)
race <- prep_curr_race %>% 
  filter(PREP_CURRENT=="on prep") %>%
  select(-PREP_CURRENT)
region <- prep_curr_region %>% 
  filter(PREP_CURRENT=="on prep") %>%
  select(-PREP_CURRENT)

## Save objects
save_out$target_prep_current <- list(age = age,
                                     race = race,
                                     region = region)

PDAP

  • Whether the respondent was on WA State PrEP-DAP (PDAPA) or on Gilead’s Medication Assistance Program (PDAPB) among those who are currently on PrEP

  • Sample restricted to those currently on PrEP: 227. This is too few cases to use for prediction purposes.

Descriptive statistics

Overall

# with(cDF, table(PREP_CURRENT, statePDAP = PDAPA, useNA = "al"))
# with(cDF, table(PREP_CURRENT, gileadPDP = PDAPB, useNA = "al"))
# with(cDF, table(statePDAP = PDAPA, gileadPDP = PDAPB, useNA = "al"))

baseDF <- cDF %>%
  filter(PREP_CURRENT == 1) %>%
  mutate(PDAP = case_when(PDAPA == 1 | PDAPB == 1 ~ "PDAP",
                             TRUE ~ "no PDAP"))

pdap_prop <- baseDF %>%
  group_by(PDAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop %>% kable(caption= "Distribution of PDAP enrollment") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Sample restricted to current PrEP users, n =",
                 sum(pdap_prop$n)))
Distribution of PDAP enrollment
PDAP n n.wtd prop.wtd
no PDAP 49 46.7 0.28
PDAP 112 120.8 0.72
Note:
Sample restricted to current PrEP users, n = 161

Age

Table

pdap_prop_age <- baseDF %>%
  group_by(PDAP, age_group) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(age_group) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop_age %>% 
  pivot_wider(names_from = PDAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Age", rep(c("no PDAP", "PDAP"), 3)),
        caption= paste("PDAP enrollment by age")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current PrEP users; n = ",
                 sum(pdap_prop$n)))
PDAP enrollment by age
Obs N
Wtd N
Wtd Prop
Age no PDAP PDAP no PDAP PDAP no PDAP PDAP
15-24 2 5 2.5 7.5 0.25 0.75
25-34 19 36 18.4 33.9 0.35 0.65
35-44 9 29 8.6 36.1 0.19 0.81
45-54 13 28 12.0 29.0 0.29 0.71
55-65 6 14 5.2 14.4 0.27 0.73
Note:
Restricted to current PrEP users; n = 161

Plot

fig <- ggplot(pdap_prop_age, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = age_group, fill = age_group)) +
  geom_bar(stat="identity", position = "dodge", 
           color = "darkgrey", alpha = 0.7) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0))

title.plotly <-
  list(text = paste0('PDAP enrollment by age group',
                     '<br>',
                     '<sup>',
                     'n = ',
                     sum(pdap_prop$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Race

Table

pdap_prop_race <- baseDF %>%
  group_by(PDAP, race) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop_race %>% 
  pivot_wider(names_from = PDAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Race", rep(c("no PDAP", "PDAP"), 3)),
        caption= paste("PDAP enrollment by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current PrEP users; n = ",
                 sum(pdap_prop$n)))
PDAP enrollment by race
Obs N
Wtd N
Wtd Prop
Race no PDAP PDAP no PDAP PDAP no PDAP PDAP
B 3 3 3.2 13.9 0.19 0.81
H 3 10 2.7 8.7 0.24 0.76
O 43 99 40.8 98.2 0.29 0.71
Note:
Restricted to current PrEP users; n = 161

Plot

fig <- ggplot(pdap_prop_race, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = race, fill = race)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by race',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(pdap_prop$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Region

Table

pdap_prop_region <- baseDF %>%
  group_by(PDAP, region) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

pdap_prop_region %>% 
  pivot_wider(names_from = PDAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Region", rep(c("no PDAP", "PDAP"), 3)),
        caption= paste("PDAP enrollment by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current PrEP users; n = ",
                 sum(pdap_prop$n)))
PDAP enrollment by region
Obs N
Wtd N
Wtd Prop
Region no PDAP PDAP no PDAP PDAP no PDAP PDAP
EasternWA 4 7 2.8 4.2 0.40 0.60
King 25 69 28.1 88.5 0.24 0.76
WesternWA 20 36 15.8 28.1 0.36 0.64
Note:
Restricted to current PrEP users; n = 161

Plot

fig <- ggplot(pdap_prop_region, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = region, fill = region)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by region',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(pdap_prop$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Insurance

Table

tempDF <- baseDF %>%
  group_by(insurance, PDAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(insurance) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = insurance,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PDAP enrollment by insurance")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Insurance" = 5)) %>%
  footnote(paste("Proportion of current PrEP users; n = ",
                 sum(tempDF$n)))
Distribution of PDAP enrollment by insurance
Insurance
PDAP emplyr indvdl_othr medicare_caid othr_gov uninsrd_NA
no PDAP 0.2 0.28 0.75 0.58 0.69
PDAP 0.8 0.72 0.25 0.42 0.31
Note:
Proportion of current PrEP users; n = 161

Plot

fig <- ggplot(tempDF, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = insurance, fill = insurance)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by insurance',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Income

Table

tempDF <- baseDF %>%
  group_by(income_cat, PDAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(income_cat) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = income_cat,
              values_from = prop.wtd) %>%
  kable(caption= paste("Distribution of PDAP enrollment by FPL Income Category")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "FPL Income category" = 7)) %>%
  footnote(paste("Proportion of current PrEP users; n = ",
                 sum(tempDF$n)))
Distribution of PDAP enrollment by FPL Income Category
FPL Income category
PDAP fpl138 fpl138-300 fpl300-400 fpl400-500 fpl500-600 fpl600+ Missing
no PDAP 0.85 0.4 0.38 0.54 0.17 0.19 0.21
PDAP 0.15 0.6 0.62 0.46 0.83 0.81 0.79
Note:
Proportion of current PrEP users; n = 161

Plot

fig <- ggplot(tempDF, 
              aes(x = PDAP, y = prop.wtd, text = prop.wtd,
                  group = income_cat, fill = income_cat)) +
  geom_bar(stat="identity", position = "dodge", color = "darkgrey",
           alpha = 0.7, ) +
  labs(x = "PDAP enrollment",
       y = "wtd. proportion") +
  theme(plot.margin = margin(t=20,r=5,b=5,l=0)) 

title.plotly <-
  list(text = paste0('PDAP enrollment by FPL income category',
                     '<br>',
                     '<sup>',
                     'HIV-; n = ',
                     sum(tempDF$n),
                     '</sup>'))

ggplotly(fig , tooltip = "text") %>%
  layout(#hoverlabel = list(font=list(color="black")),
    title = title.plotly)

Save out targets

Given small sample sizes, we may need to use the overall proportion on PDAP. But we save the age/race/region breakdowns

prop <- pdap_prop %>% 
  filter(PDAP=="PDAP") %>%
  select(-PDAP)

# We leave the age/race/region breakdowns intact to make small cell sizes visible.

## Save objects
save_out$target_pdap <- list(prop = prop,
                             age = pdap_prop_age,
                             race = pdap_prop_race,
                             region = pdap_prop_region)

ART use and ADAP

Overall

  • ART use is nearly universal for HIV+
# There are 3 NA that can't be recovered
# with(cDF[cDF$diag.status==1,], table(ART_CURRENT, useNA="al"))

baseDF <- cDF %>%
  filter(diag.status == 1 & !is.na(ART_CURRENT)) %>%
  mutate(ART_CURRENT = factor(ifelse(ART_CURRENT == 1, 1, 0),
                              levels = c(0, 1), 
                              labels = c("not on ART", "on ART")),
         ADAP = factor(ifelse(ADAP == 1, 1, 0),
                       levels = c(0, 1), 
                       labels = c("no ADAP", "ADAP")))

tempDF <- baseDF %>%
  group_by(ART_CURRENT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

tempDF %>% kable(caption= "Distribution of ART use") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Sample restricted to HIV+, n =",
                 sum(tempDF$n)))
Distribution of ART use
ART_CURRENT n n.wtd prop.wtd
not on ART 2 2.9 0.03
on ART 87 89.7 0.97
Note:
Sample restricted to HIV+, n = 89

Enrolled in ADAP

  • This is restricted to HIV-positive on ART, resulting in a very small sample size.

  • The breakdowns by race are not reliable due to small cell sizes

  • The breakdown by region is slightly more reliable.

Overall

adap_prop <- baseDF %>%
  filter(ART_CURRENT == "on ART") %>%
  group_by(ADAP) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

adap_prop %>% kable(caption= "Distribution of ADAP enrollment") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  footnote(paste("Sample restricted to HIv+ on ART, n =",
                 sum(adap_prop$n)))
Distribution of ADAP enrollment
ADAP n n.wtd prop.wtd
no ADAP 58 62.9 0.7
ADAP 29 26.8 0.3
Note:
Sample restricted to HIv+ on ART, n = 87

Race

adap_prop_race <- baseDF %>%
  filter(ART_CURRENT == "on ART") %>%
  group_by(ADAP, race) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(race) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

adap_prop_race %>% 
  pivot_wider(names_from = ADAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Race", rep(c("no ADAP", "ADAP"), 3)),
        caption= paste("ADAP enrollment by race")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current ART users; n = ",
                 sum(adap_prop_race$n)))
ADAP enrollment by race
Obs N
Wtd N
Wtd Prop
Race no ADAP ADAP no ADAP ADAP no ADAP ADAP
B 1 2 2.4 3.0 0.44 0.56
H 5 2 9.4 1.9 0.83 0.17
O 52 25 51.1 22.0 0.70 0.30
Note:
Restricted to current ART users; n = 87

Region

adap_prop_region <- baseDF %>%
  filter(ART_CURRENT == "on ART") %>%
  group_by(ADAP, region) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  group_by(region) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2))

adap_prop_region %>% 
  pivot_wider(names_from = ADAP,
              values_from = c(n, n.wtd, prop.wtd)) %>%
  kable(col.names = c("Region", rep(c("no ADAP", "ADAP"), 3)),
        caption= paste("ADAP enrollment by region")) %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Obs N" = 2, "Wtd N" = 2, "Wtd Prop"= 2)) %>%
  footnote(paste("Restricted to current ART users; n = ",
                 sum(adap_prop_region$n)))
ADAP enrollment by region
Obs N
Wtd N
Wtd Prop
Region no ADAP ADAP no ADAP ADAP no ADAP ADAP
EasternWA 4 5 2.4 2.9 0.45 0.55
King 30 12 38.3 14.4 0.73 0.27
WesternWA 24 12 22.2 9.5 0.70 0.30
Note:
Restricted to current ART users; n = 87

Save out targets

Given small sample sizes, we just save the overall proportion on PDAP.

prop <- adap_prop %>% 
  filter(ADAP=="ADAP") %>%
  select(-ADAP)

# We leave the race/region breakdown intact to make cell size issues visible

## Save objects
save_out$target_adap <- list(prop = prop,
                             race = adap_prop_race,
                             region = adap_prop_region)

STI infections in the past 12 months

  • This is based on three variables BSTIA (GC), BSTIB (CT), and BSTIC (syphilis).
baseDF <- cDF %>%
  mutate(
    PDAP = ifelse(PDAPA == 1 | PDAPB == 1, 1, 0),
    PREP = ifelse(is.na(PREP_CURRENT), 0, PREP_CURRENT),
    hiv.grp = case_when(
      diag.status==0 & PREP == 0 ~ 1,
      diag.status==0 & PREP == 1 & PDAP == 0 ~ 2,
      diag.status==0 & PREP == 1 & PDAP == 1 ~ 3,
      diag.status==1 & ART_CURRENT == 0 ~ 4,
      diag.status==1 & ART_CURRENT == 1 & ADAP == 0 ~ 5,
      diag.status==1 & ART_CURRENT == 1 & ADAP == 1 ~ 6),
    hiv.grp = factor(hiv.grp,
                     levels = c(1:6),
                     labels = c("HIV- no PrEP", "HIV- on PrEP",
                                "HIV- in PDAP",
                                "HIV+ no ART", "HIV+ on ART",
                                "HIV+ in ADAP")),
    GC = factor(ifelse(BSTIA > 3, 4, BSTIA), 
                levels = c(1, 2, 3, 4), 
                labels = c("never", "once", "more than once", "DK/Ref")), 
    CT = factor(ifelse(BSTIB > 3, 4, BSTIB), 
                levels = c(1, 2, 3, 4), 
                labels = c("never", "once", "more than once", "DK/Ref")), 
    Syphilis = factor(ifelse(BSTIC > 3, 4, BSTIC), 
                levels = c(1, 2, 3, 4), 
                labels = c("never", "once", "more than once", "DK/Ref")))

Overall

CT

baseDF %>%
  group_by(CT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "CT in last 12 mos") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
CT in last 12 mos
CT n n.wtd prop.wtd
never 729 716.9 0.78
once 47 55.6 0.06
more than once 16 17.1 0.02
DK/Ref 5 4.4 0.00
NA 126 129.7 0.14

GC

baseDF %>%
  group_by(GC) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "GC in last 12 mos") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
GC in last 12 mos
GC n n.wtd prop.wtd
never 720 717.3 0.78
once 64 63.9 0.07
more than once 11 10.7 0.01
DK/Ref 4 3.6 0.00
NA 124 128.2 0.14

Syphilis

baseDF %>%
  group_by(Syphilis) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  kable(caption= "Syphilis in last 12 mos") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped"))
Syphilis in last 12 mos
Syphilis n n.wtd prop.wtd
never 761 757.9 0.82
once 26 25.2 0.03
more than once 6 6.5 0.01
DK/Ref 6 5.8 0.01
NA 124 128.2 0.14

Dx HIV status

CT

baseDF %>%
  group_by(diag.status, CT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(diag.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = diag.status,
              values_from = prop.wtd) %>%
  kable(caption= "CT in last 12 mos by Dx HIV status") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx HIV status" = 2))
CT in last 12 mos by Dx HIV status
Dx HIV status
CT 0 1
never 0.78 0.70
once 0.05 0.16
more than once 0.02 NA
DK/Ref 0.00 0.01
NA 0.14 0.13

GC

baseDF %>%
  group_by(diag.status, GC) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(diag.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = diag.status,
              values_from = prop.wtd) %>%
  kable(caption= "GC in last 12 mos by Dx HIV status") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx HIV status" = 2))
GC in last 12 mos by Dx HIV status
Dx HIV status
GC 0 1
never 0.78 0.76
once 0.07 0.08
more than once 0.01 0.03
DK/Ref 0.00 NA
NA 0.14 0.13

Syphilis

baseDF %>%
  group_by(diag.status, Syphilis) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(diag.status) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = diag.status,
              values_from = prop.wtd) %>%
  kable(caption= "Syphilis in last 12 mos by Dx HIV status") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx HIV status" = 2))
Syphilis in last 12 mos by Dx HIV status
Dx HIV status
Syphilis 0 1
never 0.83 0.78
once 0.02 0.08
more than once 0.01 NA
DK/Ref 0.01 0.01
NA 0.14 0.13

HIV/Care/DAP status

CT

nmiss <- with(baseDF, table(is.na(CT) | is.na(hiv.grp)))[[2]]
              
baseDF %>%
  filter(!is.na(CT) & !is.na(hiv.grp)) %>%
  group_by(hiv.grp, CT) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(hiv.grp) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = hiv.grp,
              values_from = prop.wtd) %>%
  kable(caption= "CT in last 12 mos by HIV/Care/DAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx/Care/DAP status" = 5)) %>%
  footnote(paste(nmiss, "Missing cases"))
CT in last 12 mos by HIV/Care/DAP
Dx/Care/DAP status
CT HIV- no PrEP HIV- on PrEP HIV- in PDAP HIV+ on ART HIV+ in ADAP
never 0.95 0.80 0.80 0.81 0.83
once 0.03 0.09 0.17 0.18 0.17
more than once 0.02 0.11 0.03 NA NA
DK/Ref 0.01 NA NA 0.01 NA
Note:
130 Missing cases

GC

nmiss <- with(baseDF, table(is.na(GC) | is.na(hiv.grp)))[[2]]

baseDF %>%
  filter(!is.na(GC) & !is.na(hiv.grp)) %>%
  group_by(hiv.grp, GC) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(hiv.grp) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = hiv.grp,
              values_from = prop.wtd) %>%
  kable(caption= "GC in last 12 mos by HIV/Care/DAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx/Care/DAP status" = 5)) %>%
  footnote(paste(nmiss, "Missing cases"))
GC in last 12 mos by HIV/Care/DAP
Dx/Care/DAP status
GC HIV- no PrEP HIV- on PrEP HIV- in PDAP HIV+ on ART HIV+ in ADAP
never 0.95 0.70 0.79 0.92 0.81
once 0.04 0.26 0.19 0.06 0.19
more than once 0.01 0.04 0.03 0.01 NA
DK/Ref 0.01 NA NA NA NA
Note:
128 Missing cases

Syphilis

nmiss <- with(baseDF, table(is.na(GC) | is.na(hiv.grp)))[[2]]

baseDF %>%
  filter(!is.na(GC) & !is.na(hiv.grp)) %>%
  group_by(hiv.grp, Syphilis) %>%
  summarise(n = n(),
            n.wtd = round(sum(ego.wawt), 1)) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>% 
  group_by(hiv.grp) %>%
  mutate(prop.wtd = round(n.wtd / sum(n.wtd), 2)) %>%
  select(-c(n, n.wtd)) %>%
  pivot_wider(names_from = hiv.grp,
              values_from = prop.wtd) %>%
  kable(caption= "Syphilis in last 12 mos by HIV/Care/DAP") %>% 
  kable_styling(full_width=F, position="center", 
                bootstrap_options = c("striped")) %>%
  add_header_above(c("", "Dx/Care/DAP status" = 5)) %>%
  footnote(paste(nmiss, "Missing cases"))
Syphilis in last 12 mos by HIV/Care/DAP
Dx/Care/DAP status
Syphilis HIV- no PrEP HIV- on PrEP HIV- in PDAP HIV+ on ART HIV+ in ADAP
never 0.98 0.91 0.90 0.91 0.92
once 0.01 0.02 0.08 0.07 0.08
more than once 0.01 0.07 NA NA NA
DK/Ref 0.00 NA 0.02 0.02 NA
Note:
128 Missing cases

Descriptives table

des_table <- 
  tibble(Outputs = names(save_out), 
         Description = c("predicted income", 
                         "income FPL levels", 
                         "predicted insurance", 
                         "predicted PrEP awareness", 
                         "predicted PrEP ever (interest)", 
                         "target HIV- on PrEP", 
                         "target PrEP users in PDAP", 
                         "target ART patients in ADAP"), 
         Source = c("multinomial regression on age, race, region", 
                         "FPL guidelines",
                         "multinomial regression on age, race, region, hiv status", 
                         "logistic regression on race, region", 
                         "logistic regression on age, region", 
                         "obs props overall and by age, race and region", 
                         "obs props overall and by age, race and region", 
                         "obs props overall and by race and region"), 
         group = c(rep("Parameters: Demog", 3),
                   rep("Parameters: PrEP", 2),
                   rep("Targets", 3))) 
des_table %>%
  gt::gt(rowname_col = "Outputs",
     groupname_col = "group")
Description Source
Parameters: Demog
pred_inc predicted income multinomial regression on age, race, region
income_levels income FPL levels FPL guidelines
pred_ins predicted insurance multinomial regression on age, race, region, hiv status
Parameters: PrEP
pred_prep_aware predicted PrEP awareness logistic regression on race, region
pred_prep_ever predicted PrEP ever (interest) logistic regression on age, region
Targets
target_prep_current target HIV- on PrEP obs props overall and by age, race and region
target_pdap target PrEP users in PDAP obs props overall and by age, race and region
target_adap target ART patients in ADAP obs props overall and by race and region
print("Structure of output object:")
## [1] "Structure of output object:"
for(i in 1:length(names(save_out))){
  str(save_out[i], vec.len=0, give.attr=F)}
## List of 1
##  $ pred_inc:'data.frame':    270 obs. of  5 variables:
##   ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   ..$ race     : Factor w/ 3 levels "B","H","O": NULL ...
##   ..$ region   : Factor w/ 3 levels "EasternWA","King",..: NULL ...
##   ..$ income   : chr [1:270]  ...
##   ..$ prob     : num [1:270] NULL ...
## List of 1
##  $ income_levels:'data.frame':   6 obs. of  3 variables:
##   ..$ lvl: int [1:6] NULL ...
##   ..$ lb : num [1:6] NULL ...
##   ..$ ub : num [1:6] NULL ...
## List of 1
##  $ pred_ins: tibble [450 x 6] (S3: tbl_df/tbl/data.frame)
##   ..$ age_group  : Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   ..$ race       : Factor w/ 3 levels "B","H","O": NULL ...
##   ..$ region     : Factor w/ 3 levels "EasternWA","King",..: NULL ...
##   ..$ diag.status: chr [1:450]  ...
##   ..$ insurance  : chr [1:450]  ...
##   ..$ prob       : num [1:450] NULL ...
## List of 1
##  $ pred_prep_aware:'data.frame': 9 obs. of  3 variables:
##   ..$ race  : Factor w/ 3 levels "O","B","H": NULL ...
##   ..$ region: Factor w/ 3 levels "King","EasternWA",..: NULL ...
##   ..$ prob  : num [1:9] NULL ...
## List of 1
##  $ pred_prep_ever:'data.frame':  15 obs. of  3 variables:
##   ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   ..$ region   : Factor w/ 3 levels "King","EasternWA",..: NULL ...
##   ..$ prob     : num [1:15] NULL ...
## List of 1
##  $ target_prep_current:List of 3
##   ..$ age   : grouped_df [5 x 4] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   .. ..$ n        : int [1:5] NULL ...
##   .. ..$ n.wtd    : num [1:5] NULL ...
##   .. ..$ prop.wtd : num [1:5] NULL ...
##   ..$ race  : grouped_df [3 x 4] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ race    : chr [1:3]  ...
##   .. ..$ n       : int [1:3] NULL ...
##   .. ..$ n.wtd   : num [1:3] NULL ...
##   .. ..$ prop.wtd: num [1:3] NULL ...
##   ..$ region: grouped_df [3 x 4] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ region  : chr [1:3]  ...
##   .. ..$ n       : int [1:3] NULL ...
##   .. ..$ n.wtd   : num [1:3] NULL ...
##   .. ..$ prop.wtd: num [1:3] NULL ...
## List of 1
##  $ target_pdap:List of 4
##   ..$ prop  : tibble [1 x 3] (S3: tbl_df/tbl/data.frame)
##   .. ..$ n       : int NULL ...
##   .. ..$ n.wtd   : num NULL ...
##   .. ..$ prop.wtd: num NULL ...
##   ..$ age   : grouped_df [10 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ PDAP     : chr [1:10]  ...
##   .. ..$ age_group: Factor w/ 5 levels "15-24","25-34",..: NULL ...
##   .. ..$ n        : int [1:10] NULL ...
##   .. ..$ n.wtd    : num [1:10] NULL ...
##   .. ..$ prop.wtd : num [1:10] NULL ...
##   ..$ race  : grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ PDAP    : chr [1:6]  ...
##   .. ..$ race    : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...
##   ..$ region: grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ PDAP    : chr [1:6]  ...
##   .. ..$ region  : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...
## List of 1
##  $ target_adap:List of 3
##   ..$ prop  : tibble [1 x 3] (S3: tbl_df/tbl/data.frame)
##   .. ..$ n       : int NULL ...
##   .. ..$ n.wtd   : num NULL ...
##   .. ..$ prop.wtd: num NULL ...
##   ..$ race  : grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ ADAP    : Factor w/ 2 levels "no ADAP","ADAP": NULL ...
##   .. ..$ race    : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...
##   ..$ region: grouped_df [6 x 5] (S3: grouped_df/tbl_df/tbl/data.frame)
##   .. ..$ ADAP    : Factor w/ 2 levels "no ADAP","ADAP": NULL ...
##   .. ..$ region  : chr [1:6]  ...
##   .. ..$ n       : int [1:6] NULL ...
##   .. ..$ n.wtd   : num [1:6] NULL ...
##   .. ..$ prop.wtd: num [1:6] NULL ...

Save output datafiles

save_out$des_table <- des_table # description table
saveRDS(save_out, here::here("Data", "WhampCostStats.rds"))