knitr::opts_chunk$set(error = TRUE, message = FALSE, warning = FALSE)

library(dplyr)
library(tidyr)
library(lubridate)
library(spatstat)
library(ggplot2)
library(kableExtra)
library(survival)
library(survminer)

Preliminaries

egoDF <- WHAMPData::whampArtnetWA_egodata$main$egos

wDF <- WHAMPData::whamp_wide %>%
  haven::zap_label() %>%
  mutate(RCNTESTMONTH = as.numeric(RCNTESTMONTH),
         RCNTESTYEAR = as.numeric(RCNTESTYEAR)) %>%
  select(ego.id = Vrid, Vdatesub,
         EVERTEST, TEST2YRS,
         PREPCHKFREQ_CURR, PREP_HIVTESTFREQ, 
         RCNTESTMONTH, RCNTESTYEAR, RCNTRSLT) 

aDF <- ARTnetData::ARTnet.wide %>% 
  filter(State == "WA") %>%
  select(ego.id = AMIS_ID, Vdatesub = SUB_DATE,
         EVERTEST, TEST2YRS, #aprep,
         PREPCHKFREQ_CURR, PREP_HIVTESTFREQ, 
         RCNTESTMONTH = RCNTSTMONTH, RCNTESTYEAR = RCNTSTYEAR,
         RCNTRSLT)

newvars <- bind_rows(wDF,aDF)

allDF <- egoDF %>% left_join(newvars, by = "ego.id") 

# note we lose 181 cases if we filter on both
# EVERTEST != 1 (91), and diag.status=1 (90)
allDF %>% 
  with(table(notest.or.pos=(EVERTEST != 1 | diag.status==1), 
             if_artnet, useNA = "al"))

##              if_artnet
## notest.or.pos   0   1 <NA>
##         FALSE 541 110    0
##         TRUE  140  40    0
##         <NA>    1   0    0

# About 40% of HIV+ were dx this year 
allDF %>% 
  with(table(last.testyr = RCNTESTYEAR, 
             HIV.status = diag.status, useNA = "al"))

##            HIV.status
## last.testyr   0   1 <NA>
##        0      3   0    0
##        1962   0   1    0
##        1963   0   1    0
##        1985   0   1    0
##        1986   0   2    0
##        1987   0   3    0
##        1989   0   1    0
##        1992   0   3    0
##        1994   0   3    0
##        1995   1   1    0
##        1996   0   1    0
##        1997   0   4    0
##        1998   0   1    0
##        2000   0   2    0
##        2001   2   1    0
##        2002   1   0    0
##        2003   0   2    0
##        2004   0   1    0
##        2005   4   0    0
##        2006   3   1    0
##        2007   4   1    0
##        2008   2   5    0
##        2009   1   1    0
##        2010   3   1    0
##        2011   5   1    0
##        2012   3   0    0
##        2013  13   1    0
##        2014   8   0    0
##        2015  20   0    0
##        2016  25   1    0
##        2017  91   7    0
##        2018 121   5    1
##        2019 332  33    0
##        <NA>  66   0    0
##        NaN   34   4    0

allDF %>% 
  with(table(last.test.2019 = RCNTESTYEAR==2019, 
             HIV.status = diag.status, useNA = "al"))

##               HIV.status
## last.test.2019   0   1 <NA>
##          FALSE 310  52    1
##          TRUE  332  33    0
##          <NA>  100   4    0

# ## and for ARTnet
# ARTnetData::ARTnet.wide %>% 
#   with(table(last.testyr = RCNTSTYEAR, 
#              HIV.status = RCNTRSLT==2, useNA = "al"))
# 
# ARTnetData::ARTnet.wide %>% 
#   with(table(last.test.2017 = RCNTSTYEAR > 2016, 
#              HIV.status = RCNTRSLT==2, useNA = "al"))

# Dx after 2017 have been testing in last 2 yrs
allDF %>%  
  filter(diag.status==1) %>% 
  with(table(TEST2YRS, diag.after.2017 = RCNTESTYEAR > 2017, 
             useNA = "al"))

##         diag.after.2017
## TEST2YRS FALSE TRUE <NA>
##     0       35    1    2
##     1        7    3    0
##     2        2    6    0
##     3        0    2    0
##     4        1   12    0
##     5        0    4    0
##     6        0    3    0
##     7        0    1    0
##     8        0    4    0
##     12       0    1    0
##     <NA>     2    1    0
##     NaN      0    0    2

# So we filter on diag.status==1 (Negative), 
# and set tests in last 2 yrs to 0 for EVERTEST != 1.  
# Last test date is still NA for those folks.

allTestDF <- egoDF %>%
  left_join(newvars, by = "ego.id") %>%
  filter(diag.status == 0) %>%
  mutate(age.grp = ifelse(age.grp == 6, 5, age.grp),
         snap.grp = cut(snap, 
                        breaks = c(0, .5, 1, 5, 10, 20,
                                   max(snap)),
                        include.lowest = T,
                        include.hightest = T),
         snap.grp = recode(snap.grp, "[0,0.5]" = "[0]",
                           "(0.5,1]" = "[1]"),
         snap.grp3 = cut(snap, 
                        breaks = c(0, 1, 5,
                                   max(snap)),
                        include.lowest = T,
                        include.hightest = T),
         snap.grp3 = recode(snap.grp3, "[0,1]" = "0-1",
                            "(1,5]" = "2-5",
                            "(5,125]" = "6-125"),
         never_tested = ifelse(is.na(EVERTEST) | EVERTEST != 1, 
                               "T", "F"),
         tests_2yr = case_when(never_tested == "T" ~ 0,
                               is.na(TEST2YRS) ~ 0,
                               TEST2YRS == 100 ~ NA_real_,
                               TRUE ~ TEST2YRS),
         no_tests_2yrs = ifelse(tests_2yr==0, "T", "F"),
         testratemo_2yr = round(tests_2yr/24,3),
         RCNTESTYEAR = ifelse(RCNTESTYEAR==0, 2019, RCNTESTYEAR),
         last_test = case_when(
           is.na(RCNTESTYEAR) | is.na(RCNTESTMONTH) ~ NA_character_,
           as.numeric(RCNTESTYEAR) - (2019-age) < 13 ~ NA_character_,
           as.numeric(RCNTESTYEAR) > 2019 ~ NA_character_,
           TRUE ~ paste(RCNTESTYEAR, RCNTESTMONTH, 1, sep="-")),
         mos_last_test = interval(ymd(last_test), 
                                  ymd(Vdatesub)) %/% months(1),
         mos_last_test = ifelse(mos_last_test<0, 0, mos_last_test),
         test_group = case_when(never_tested == "T" ~ 0,
                                never_tested == "F" & no_tests_2yrs == "T" ~ 1,
                                !is.na(mos_last_test) & mos_last_test < 13 ~ 3,
                                TRUE ~ 2),
         prep_checkup = ifelse(prep==1, 
                               case_when(PREPCHKFREQ_CURR==1 ~ 1,
                                         PREPCHKFREQ_CURR==2 ~ 3,
                                         PREPCHKFREQ_CURR==3 ~ 6,
                                         PREPCHKFREQ_CURR==5 ~ 12,
                                         TRUE ~ 12), 
                               NA_real_),
         prep_hivtest = ifelse(prep==1, 
                              case_when(PREP_HIVTESTFREQ==1 ~ 1,
                                        TRUE ~ 2), 
                              NA_real_),
         prep_rate = prep_checkup * prep_hivtest) 

labelled::val_labels(allTestDF$test_group) <- 
  c("Never"=0, ">2yrs ago"=1, "1-2yrs"=2, "this yr"=3)

saveRDS(allTestDF, 
        file = here::here("Reports","WHAMPsurvey","MM","Testing",
                          "allTestDF.RDS"))


# # look at key variables
# table(allTestDF$never_tested, useNA = "al")
# table(allTestDF$no_tests_2yr, useNA = "al")
# table(allTestDF$tests_2yr, useNA = "al")
# table(allTestDF$testratemo_2yr, useNA = "al")
# table(allTestDF$mos_last_test, useNA = "al")
# 
# table(allTestDF$prep, useNA = "al")
# table(allTestDF$prep_checkup, useNA = "al")
# table(allTestDF$prep_hivtest, useNA = "al")
# table(allTestDF$prep_rate, useNA = "al")

# Evertesters DF, for analyses of months since last test
mltDF <- allTestDF %>% 
  filter(!is.na(mos_last_test) & mos_last_test >= 0)

Descriptives

Test group distribution

Age group

tab.alltest <- allTestDF %>% 
  group_by(age.grp, test_group) %>%
  summarize(n.test.grp = sum(ego.wawt)) 

# table of counts
tab.alltest %>%
  pivot_wider(names_from = test_group, 
              values_from = n.test.grp) %>%
  kable(caption = "Test group by age group", 
        col.names = c("Age grp", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 0) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by age group
	Test group
Age grp	Never	>2yrs ago	1-2yrs ago	In last yr
1	56	5	24	58
2	21	30	34	118
3	11	24	20	102
4	4	17	18	88
5	4	12	18	72

tab.alltest %>%
  group_by(age.grp) %>%
  mutate(prop.test.grp = scales::percent(n.test.grp / sum(n.test.grp))) %>%
  pivot_wider(-n.test.grp, names_from = test_group, 
              values_from = prop.test.grp) %>%
  kable(caption = "Test group by age group (%)", 
        col.names = c("Age grp", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 1) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by age group (%)
	Test group
Age grp	Never	>2yrs ago	1-2yrs ago	In last yr
1	39.3%	3.6%	16.6%	40.5%
2	10.2%	14.9%	16.7%	58.3%
3	7.0%	15.0%	13.0%	65.0%
4	3.46%	13.56%	14.03%	68.95%
5	4.2%	10.9%	16.8%	68.0%

tab.alltest %>% 
  group_by(age.grp) %>%
  mutate(prop.test.grp = n.test.grp / sum(n.test.grp)) %>%
  ungroup() %>%
  ggplot(aes(x = age.grp, y=prop.test.grp, color = factor(test_group))) +
  geom_line() +
  geom_point() +
  scale_color_discrete(name="Test group",
                      labels=c("Never", ">2yrs ago", "1-2 yrs ago", "In last yr")) +
  labs(title="Testing patterns by age",
       x = "Age group",
       y = "Proportion in test group")

tab.alltest %>% 
  group_by(age.grp) %>%
  mutate(prop.test.grp = n.test.grp / sum(n.test.grp)) %>%
  ungroup() %>%
  ggplot(aes(x = test_group, y=prop.test.grp, color = factor(age.grp))) +
  geom_line() +
  geom_point() +
  # scale_color_discrete(name="Test group",
  #                     labels=c("Never", ">2yrs ago", "1-2 yrs ago", "In last yr")) +
  labs(title="Testing patterns by age",
       x = "Test group",
       y = "Proportion in test group")

Race

## Rest is for no Prep
# table of counts
tab.alltest <- allTestDF %>% filter(prep == 0) %>%
  group_by(race, test_group) %>%
  summarize(n.test.grp = sum(ego.wawt)) 

tab.alltest %>%
  pivot_wider(names_from = test_group, 
              values_from = n.test.grp) %>%
  kable(caption = "Test group by race (not on PrEP)", 
        col.names = c("", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 0) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by race (not on PrEP)
	Test group
	Never	>2yrs ago	1-2yrs ago	In last yr
B	12	5	3	11
H	19	9	9	31
O	66	72	94	216

tab.alltest %>%
  group_by(race) %>%
  mutate(prop.test.grp = scales::percent(n.test.grp / sum(n.test.grp))) %>%
  pivot_wider(-n.test.grp, names_from = test_group, 
              values_from = prop.test.grp) %>%
  kable(caption = "Test group by race (not on PrEP)", 
        col.names = c("Race", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 1) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by race (not on PrEP)
	Test group
Race	Never	>2yrs ago	1-2yrs ago	In last yr
B	36.8%	16.7%	10.7%	35.7%
H	28.16%	13.70%	12.99%	45.15%
O	14.7%	16.2%	21.0%	48.2%

tab.alltest %>% 
  group_by(race) %>%
  mutate(prop.test.grp = n.test.grp / sum(n.test.grp)) %>%
  ungroup() %>%
  ggplot(aes(x = test_group, y=prop.test.grp, color = race)) +
  geom_line() +
  geom_point() +
  # scale_color_discrete(name="Test group",
  #                     labels=c("Never", ">2yrs ago", "1-2 yrs ago", "In last yr")) +
  labs(title="Testing patterns by race (not on PrEP)",
       x = "Test group",
       y = "Proportion in test group")

Region

# Rest is no Prep
# table of counts
tab.alltest <- allTestDF %>% 
  group_by(region, test_group) %>%
  summarize(n.test.grp = sum(ego.wawt)) 

tab.alltest %>%
  pivot_wider(names_from = test_group, 
              values_from = n.test.grp) %>%
  kable(caption = "Test group by region (not on PrEP)", 
        col.names = c("", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 0) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by region (not on PrEP)
	Test group
	Never	>2yrs ago	1-2yrs ago	In last yr
EasternWA	20	5	16	36
King	50	51	57	262
WesternWA	28	32	41	140

tab.alltest %>%
  group_by(region) %>%
  mutate(prop.test.grp = scales::percent(n.test.grp / sum(n.test.grp))) %>%
  pivot_wider(-n.test.grp, names_from = test_group, 
              values_from = prop.test.grp) %>%
  kable(caption = "Test group by region (not on PrEP)", 
        col.names = c("Region", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 1) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by region (not on PrEP)
	Test group
Region	Never	>2yrs ago	1-2yrs ago	In last yr
EasternWA	25.7%	6.4%	20.5%	47.4%
King	11.81%	12.09%	13.66%	62.45%
WesternWA	11.5%	13.4%	16.9%	58.2%

tab.alltest %>% 
  group_by(region) %>%
  mutate(prop.test.grp = n.test.grp / sum(n.test.grp)) %>%
  ungroup() %>%
  ggplot(aes(x = test_group, y=prop.test.grp, color = region)) +
  geom_line() +
  geom_point() +
  # scale_color_discrete(name="Test group",
  #                     labels=c("Never", ">2yrs ago", "1-2 yrs ago", "In last yr")) +
  labs(title="Testing patterns by region (not on PrEP)",
       x = "Test group",
       y = "Proportion in test group")

### SNAP Group

# table of counts
tab.alltest <- allTestDF %>% 
  group_by(snap.grp, test_group) %>%
  summarize(n.test.grp = sum(ego.wawt)) 

tab.alltest %>%
  pivot_wider(names_from = test_group, 
              values_from = n.test.grp) %>%
  kable(caption = "Test group by snap.grp (not on PrEP)", 
        col.names = c("", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 0) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by snap.grp (not on PrEP)
	Test group
	Never	>2yrs ago	1-2yrs ago	In last yr
[0]	14	22	15	19
[1]	50	57	53	98
(1,5]	27	6	31	148
(5,10]	2	1	5	80
(10,20]	3	1	4	48
(20,125]	NA	1	5	45

tab.alltest %>%
  group_by(snap.grp) %>%
  mutate(prop.test.grp = scales::percent(n.test.grp / sum(n.test.grp))) %>%
  pivot_wider(-n.test.grp, names_from = test_group, 
              values_from = prop.test.grp) %>%
  kable(caption = "Test group by snap.grp (not on PrEP)", 
        col.names = c("snap.grp", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 1) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by snap.grp (not on PrEP)
	Test group
snap.grp	Never	>2yrs ago	1-2yrs ago	In last yr
[0]	19.8%	31.4%	21.7%	27.1%
[1]	19.5%	22.0%	20.7%	37.9%
(1,5]	12.8%	2.9%	14.7%	69.7%
(5,10]	2.6%	0.9%	6.1%	90.4%
(10,20]	5.77%	2.33%	6.29%	85.61%
(20,125]	NA	1.7%	9.7%	88.5%

tab.alltest %>% 
  group_by(snap.grp) %>%
  mutate(prop.test.grp = n.test.grp / sum(n.test.grp)) %>%
  ungroup() %>%
  ggplot(aes(x = test_group, y=prop.test.grp, color = snap.grp)) +
  geom_line() +
  geom_point() +
  # scale_color_discrete(name="Test group",
  #                     labels=c("Never", ">2yrs ago", "1-2 yrs ago", "In last yr")) +
  labs(title="Testing patterns by snap.grp (not on PrEP)",
       x = "Test group",
       y = "Proportion in test group")

SNAP Group (3 category)

# table of counts
tab.alltest <- allTestDF %>% 
  group_by(snap.grp3, test_group) %>%
  summarize(n.test.grp = sum(ego.wawt)) 

tab.alltest %>%
  pivot_wider(names_from = test_group, 
              values_from = n.test.grp) %>%
  kable(caption = "Test group by snap.grp3 (not on PrEP)", 
        col.names = c("", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 0) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by snap.grp3 (not on PrEP)
	Test group
	Never	>2yrs ago	1-2yrs ago	In last yr
0-1	64	79	69	117
2-5	27	6	31	148
6-125	6	3	14	174

tab.alltest %>%
  group_by(snap.grp3) %>%
  mutate(prop.test.grp = scales::percent(n.test.grp / sum(n.test.grp))) %>%
  pivot_wider(-n.test.grp, names_from = test_group, 
              values_from = prop.test.grp) %>%
  kable(caption = "Test group by snap.grp3 (not on PrEP)", 
        col.names = c("snap.grp3", "Never", ">2yrs ago", "1-2yrs ago", "In last yr"),
        digits = 1) %>%
  kable_styling(bootstrap_options = c("striped")) %>%
  add_header_above(c(" " = 1, "Test group" = 4))

Test group by snap.grp3 (not on PrEP)
	Test group
snap.grp3	Never	>2yrs ago	1-2yrs ago	In last yr
0-1	19.5%	24.0%	20.9%	35.6%
2-5	12.8%	2.9%	14.7%	69.7%
6-125	2.8%	1.5%	7.1%	88.5%

tab.alltest %>% 
  group_by(snap.grp3) %>%
  mutate(prop.test.grp = n.test.grp / sum(n.test.grp)) %>%
  ungroup() %>%
  ggplot(aes(x = test_group, y=prop.test.grp, color = snap.grp3)) +
  geom_line() +
  geom_point() +
  # scale_color_discrete(name="Test group",
  #                     labels=c("Never", ">2yrs ago", "1-2 yrs ago", "In last yr")) +
  labs(title="Testing patterns by snap.grp3 (not on PrEP)",
       x = "Test group",
       y = "Proportion in test group")

Never testers

Age

allTestDF %>% group_by(age.grp) %>%
  mutate(nt = ifelse(never_tested == "F", 0, ego.wawt)) %>%
  summarize(neverTestProp = scales::percent(sum(nt)/sum(ego.wawt), 0.1)) %>%
  kable(caption = "Never tested by age group", 
        col.names = c("Age", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped"))

Never tested by age group
Age	Percent
1	39.3%
2	10.2%
3	7.0%
4	3.5%
5	4.2%

p <- allTestDF %>% group_by(age.grp) %>%
  mutate(nt = ifelse(never_tested == "F", 0, ego.wawt)) %>%
  summarize(neverTestProp = sum(nt)/sum(ego.wawt)) %>%
  ggplot(aes(x = age.grp, y = neverTestProp)) +
  ylim(0,1) +
  geom_point() + geom_line() +
  labs(title = "Never test proportion by age")

plotly::ggplotly(p)

Race

allTestDF %>% group_by(race) %>%
  mutate(nt = ifelse(never_tested == "F", 0, ego.wawt)) %>%
  summarize(neverTestProp = scales::percent(sum(nt)/sum(ego.wawt), 0.1)) %>%
  kable(caption = "Never tested by race", 
        col.names = c("Race", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped"))

Never tested by race
Race	Percent
B	23.8%
H	23.8%
O	10.9%

p <- allTestDF %>% group_by(age.grp, race) %>%
  mutate(nt = ifelse(never_tested == "F", 0, ego.wawt)) %>%
  summarize(neverTestProp = sum(nt)/sum(ego.wawt)) %>%
  ggplot(aes(x = age.grp, y = neverTestProp, color = race)) +
  ylim(0,1) +
  geom_point() + geom_line() +
  labs(title = "Never test proportion by race and age")

plotly::ggplotly(p)

Region

allTestDF %>% group_by(region) %>%
  mutate(nt = ifelse(never_tested == "F", 0, ego.wawt)) %>%
  summarize(neverTestProp = scales::percent(sum(nt)/sum(ego.wawt), 0.1)) %>%
  kable(caption = "Never tested by region", 
        col.names = c("Region", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped"))

Never tested by region
Region	Percent
EasternWA	25.7%
King	11.8%
WesternWA	11.5%

p <- allTestDF %>% group_by(age.grp, region) %>%
  mutate(nt = ifelse(never_tested == "F", 0, ego.wawt)) %>%
  summarize(neverTestProp = sum(nt)/sum(ego.wawt)) %>%
  ggplot(aes(x = age.grp, y = neverTestProp, color = region)) +
  ylim(0,1) +
  geom_point() + geom_line() +
  labs(title = "Never test proportion by region and age")
  
plotly::ggplotly(p)

Months since last test

Overall

# compare mos since last test by prep status
with(mltDF, boxplot(mos_last_test ~ prep, 
        ylim=c(0,100), varwidth=T))

# is mos since last test exponentially distributed?
# here just look for linear trend on log scale, will
# do more below.
# NB: ever testers only, not on PrEP

hist.data.wtd = with(mltDF[mltDF$prep==0,],
                 plotrix::weighted.hist(mos_last_test, 
                                        w = ego.wawt,
                                        breaks=20, plot=F))

hist.data.wtd$counts = log(hist.data.wtd$counts+1)
class(hist.data.wtd) <- "histogram"
plot(hist.data.wtd,
     ylab='log(Frequency)', 
     main = "Log plot of months since last test",
     sub = "(filter: Ever tested & not on PrEP)",
     xlab = "Months")

Exp fits by race

# Compare exponential fits for mlt for those not on PrEP

obs <- mltDF %>%
  filter(prep == 0) %>%
  mutate(mlt = mos_last_test,
         rate = 1/mlt) %>%
  select(race, age.grp, mlt, rate, ego.wawt)

## by Race  ------------------------------------------

byrace <- obs %>%
  group_by(race) %>%
  summarize(wtd.mean.mlt = weighted.mean(mlt, ego.wawt, na.rm=T),
            wtd.med.mean.mlt = weighted.median(mlt, ego.wawt)/log(2),
            wtd.median.mlt = weighted.median(mlt, ego.wawt),
            max = max(mlt),
            wtd.num = sum(ego.wawt),
            num = n())

all <- obs %>%
  summarize(race = "Total",
            wtd.mean.mlt = weighted.mean(mlt, ego.wawt, na.rm=T),
            wtd.med.mean.mlt = weighted.median(mlt, ego.wawt)/log(2),
            wtd.median.mlt = weighted.median(mlt, ego.wawt),
            max = max(mlt),
            wtd.num = sum(ego.wawt),
            num = n())

testtab <- bind_rows(byrace, all)


pred <- data.frame(t = rep(1:max(obs$mlt), 4),
                   race = c(rep("B", max(obs$mlt)),
                            rep("H", max(obs$mlt)),
                            rep("O", max(obs$mlt)),
                            rep("Total", max(obs$mlt)))) %>%
  left_join(testtab, by="race") %>%
  mutate(y.mean = wtd.num * 1/wtd.mean.mlt * 1/exp(t/wtd.mean.mlt),
         y.med.mean = wtd.num * 1/wtd.med.mean.mlt * 1/exp(t/wtd.med.mean.mlt),
         y.median = wtd.num * 1/wtd.median.mlt * 1/exp(t/wtd.median.mlt))

chkpred <- pred %>%
  group_by(race) %>%
  summarize(tot.mean = sum(y.mean),
         tot.medmean = sum(y.med.mean),
         tot.median = sum(y.median))

# Plot regular scale

# Observed data
obs2 <- obs %>%
  mutate(race = "Total") %>%
  bind_rows(., obs)

colors <- c("Adj.median" = "blue", 
            "Mean" = "red", 
            "Median" = "green")

ggplot(obs2, aes(x=mlt)) +
  geom_bar() +
  geom_line(data=pred, 
            aes(x=t, y=y.mean, color="Mean")) +
  geom_line(data=pred, 
            aes(x=t, y=y.med.mean, color="Adj.median")) +
  geom_line(data=pred, 
            aes(x=t, y=y.median, color="Median")) +
  xlim(0,100) +
  labs(title = "Months since last HIV test",
       caption = "Not on PrEP",
       x = "Months",
       color = "Legend") +
  scale_color_manual(values = colors) +
  facet_wrap(~race, scales = "free_y")

By age

# Histogram overlay

ggplot(obs, aes(x = pmin(36, mlt), 
                after_stat(density), fill=race, color=race)) +
  geom_histogram(alpha=0.5, binwidth = 3) +
  labs(title = "Histogram of MLT density by race",
       caption = "Not on Prep")

## Age analysis ------------------------------------------

byage.grp5 <- obs %>%
  mutate(age.grp = case_when(
           age.grp == 1 ~ "15-24",
           age.grp == 2 ~ "25-34",
           age.grp == 3 ~ "35-44",
           age.grp == 4 ~ "45-54",
           age.grp >4  ~ "55-65")) %>%
  group_by(age.grp) %>%
  summarize(wtd.mean.mlt = weighted.mean(mlt, ego.wawt, na.rm=T),
            wtd.med.mean.mlt = weighted.median(mlt, ego.wawt)/log(2),
            wtd.median.mlt = weighted.median(mlt, ego.wawt),
            max = max(mlt),
            wtd.num = sum(ego.wawt),
            num = n())

all <- obs %>%
  summarize(age.grp = "Total",
            wtd.mean.mlt = weighted.mean(mlt, ego.wawt, na.rm=T),
            wtd.med.mean.mlt = weighted.median(mlt, ego.wawt)/log(2),
            wtd.median.mlt = weighted.median(mlt, ego.wawt),
            max = max(mlt),
            wtd.num = sum(ego.wawt),
            num = n())

testtab5 <- bind_rows(byage.grp5, all)

# Split in 2 groups: 55+ vs. everyone else
byage.grp2 <- obs %>%
  mutate(age.grp = case_when(
    age.grp < 5 ~ "15-54",
    TRUE  ~ "55-65")) %>%
  group_by(age.grp) %>%
  summarize(wtd.mean.mlt = weighted.mean(mlt, ego.wawt, na.rm=T),
            wtd.med.mean.mlt = weighted.median(mlt, ego.wawt)/log(2),
            wtd.median.mlt = weighted.median(mlt, ego.wawt),
            max = max(mlt),
            wtd.num = sum(ego.wawt),
            num = n())

testtab2 <- bind_rows(byage.grp2, all)

# Plot 5 group version
ggplot(obs, aes(x = pmin(36, mlt), 
                after_stat(density),
                color=factor(age.grp),
                fill=factor(age.grp)) ) +
  geom_density(alpha = 0.3) +
  labs(title = "Histogram of MLT by age group",
       x = "Months (capped at 36+)",
       caption = "Not on Prep") +
  scale_fill_brewer(name="Age Group",
                    palette = "YlGnBu", direction = -1) +
  scale_color_brewer(palette = "YlGnBu", direction = -1) +
  guides(color=F)

# Plot 2 group version
ggplot(obs, aes(x = pmin(36, mlt), 
                after_stat(density),
                fill=factor(age.grp < 5)) ) +
  geom_density(alpha = 0.5) +
  labs(title = "Histogram of MLT by <45 vs 45+",
       x = "Months (capped at 36+)",
       caption = "Not on Prep") +
  scale_fill_discrete(name="Age Group",
                      breaks=c("FALSE", "TRUE"),
                      labels=c("45+", "15-44"))

Tests in last 2 years

Histograms

# is number of tests in last 2 yrs Poisson distributed?
# NB: never testers included
with(allTestDF, hist(tests_2yr[prep==0 & tests_2yr < 13], 
                  freq=F, breaks=13, right=F,
                  main = "Number of tests in last 2 yrs",
                  xlab = "Number (truncated at 10)",
                  sub = "Poisson overlay with lambda=mean(tests)"))
p = dpois(x = allTestDF$tests_2yr, #size = 1000, 
          lambda = mean(allTestDF$tests_2yr, na.rm=T))
lines(x = allTestDF$tests_2yr+0.5, y = p,
     type="h", lwd=2, col="blue", ylab="p")
points(x= allTestDF$tests_2yr+0.5,y = p,
       pch=16,cex=1.2,col="dark red")

# a bit better, but still way off with lambda = median
with(allTestDF, hist(tests_2yr[prep==0 & tests_2yr < 13], 
                  freq=F, breaks=13, right=F,
                  main = "Number of tests in last 2 yrs",
                  xlab = "Number (truncated at 10)",
                  sub = "Poisson overlay with lambda=median(tests)"))
p = dpois(x = allTestDF$tests_2yr, #size = 1000, 
          lambda = median(allTestDF$tests_2yr, na.rm=T))
lines(x = allTestDF$tests_2yr+0.5, y = p,
      type="h", lwd=2, col="blue", ylab="p")
points(x= allTestDF$tests_2yr+0.5,y = p,
       pch=16,cex=1.2,col="dark red")

QQplot

qqplot(rpois(1000, weighted.mean(allTestDF$tests_2yr, na.rm=T)),
       allTestDF$tests_2yr[allTestDF$prep==0],
       xlim = c(0,20), ylim = c(0,20),
       main = "QQplot for Poisson Distn of Tests",
       sub = "filter: Ever tested & not on PrEP",
       xlab = "Poisson quantiles",
       ylab = "Obs number of tests")
abline(0,1, col="red")

dev.off()

## null device 
##           1

Models

Method 1: Mixed exponential

The model assumes the distribution of months since last test is a mixed exponential, with two latent groups:

\[ MLT \sim exp(\phi\frac{1}{\lambda_1} + (1 - \phi)\frac{1}{\lambda_2}) \]

Estimated using the package VGAM, you get estimates of all 3 parameters:

\(\lambda_1\), the testing rate in latent grp 1
\(\lambda_2\), the testing rate in latent grp 2
\(\phi\), for the proportion in grp 1

Sample filtering:

We need to have value for months since last test, and we will recode 0 months to 0.1 months.

Size	Sample criteria
832	Starting Sample
742	HIV-
557	Not on PrEP
461	Valid MLT

Overall

library(VGAM)

data <- mltDF %>% filter(prep==0) %>% 
  select(Y=mos_last_test, race) %>%
  mutate(Y = ifelse(Y==0, 0.1, Y))
summary(data)

##        Y              race          
##  Min.   :  0.10   Length:461        
##  1st Qu.:  4.00   Class :character  
##  Median : 10.00   Mode  :character  
##  Mean   : 22.48                     
##  3rd Qu.: 24.00                     
##  Max.   :295.00

myfit <- vglm(Y ~ 1, mix2exp, data = data, trace = TRUE)

## VGLM    linear loop  1 :  loglikelihood = -1843.5064
## VGLM    linear loop  2 :  loglikelihood = -1837.4827
## VGLM    linear loop  3 :  loglikelihood = -1835.8508
## VGLM    linear loop  4 :  loglikelihood = -1835.8315
## VGLM    linear loop  5 :  loglikelihood = -1835.8315
## VGLM    linear loop  6 :  loglikelihood = -1835.8315

Coef(myfit)

##        phi    lambda1    lambda2 
## 0.30055077 0.01892653 0.10597097

pred <- data.frame(y1 = rexp(nn <- 1000, Coef(myfit)[2])) %>%
  mutate(y2 = rexp(nn, Coef(myfit)[3]),
         Y  = ifelse(runif(nn) < Coef(myfit)[1], y1, y2))

with(data, plot(density(Y), xlim = c(-5, 100), 
                ylim = c(0,0.07), lwd=2,
                main = "All cases: Exponential mixture fit to MLT",
                xlab = "Months since last test (scale truncated at 100)",
                sub = paste("No PrEP: n=", dim(data)[1])))
with(pred, lines(density(y1), col="blue", lty=2))
with(pred, lines(density(y2), col="orange", lty=2))
with(pred, lines(density(Y), col="gray", lty=2))

abline(v=1/Coef(myfit)[2], col="blue")
abline(v=1/Coef(myfit)[3], col="orange")
abline(v=mean(data$Y))

text(1/Coef(myfit)[2], 0.06, paste(round(1/Coef(myfit)[2], 1)), 
     pos=4, col="blue")
text(1/Coef(myfit)[3], 0.06, paste(round(1/Coef(myfit)[3], 1)), 
     pos=4, col="orange")
text(mean(data$Y), 0.06, round(mean(data$Y), 1),
     pos=4)
text(80, 0.04, paste(scales::percent(Coef(myfit)[1]), "late testers"))

By race: B&H

data.bh <- data %>% filter(race !="O")
summary(data.bh)

##        Y              race          
##  Min.   :  0.10   Length:66         
##  1st Qu.:  2.00   Class :character  
##  Median :  6.50   Mode  :character  
##  Mean   : 19.70                     
##  3rd Qu.: 20.75                     
##  Max.   :170.00

myfit <- vglm(Y ~ 1, mix2exp, data = data.bh, trace = TRUE)

## VGLM    linear loop  1 :  loglikelihood = -249.23229
## VGLM    linear loop  2 :  loglikelihood = -246.69049
## VGLM    linear loop  3 :  loglikelihood = -246.33987
## VGLM    linear loop  4 :  loglikelihood = -246.33787
## VGLM    linear loop  5 :  loglikelihood = -246.33787

Coef(myfit)

##        phi    lambda1    lambda2 
## 0.44443977 0.02528401 0.26142372

pred.bh <- data.frame(y1 = rexp(nn <- 1000, Coef(myfit)[2])) %>%
  mutate(y2 = rexp(nn, Coef(myfit)[3]),
         Y  = ifelse(runif(nn) < Coef(myfit)[1], y1, y2))


with(data.bh, plot(density(Y), xlim = c(-5, 100), 
                ylim = c(0,0.07), lwd=2,
                main = "B&H:  Exponential mixture fit to MLT",
                xlab = "Months since last test (scale truncated at 100)",
                sub = paste("No PrEP: n=", dim(data.bh)[1])))
with(pred.bh, lines(density(y1), col="blue", lty=2))
with(pred.bh, lines(density(y2), col="orange", lty=2))
with(pred.bh, lines(density(Y), col="gray", lty=2))

abline(v=1/Coef(myfit)[2], col="blue")
abline(v=1/Coef(myfit)[3], col="orange")
abline(v=mean(data.bh$Y))
abline(v=mean(pred.bh$Y), col="grey")

text(1/Coef(myfit)[2], 0.06, paste(round(1/Coef(myfit)[2], 1)), 
     pos=4, col="blue")
text(1/Coef(myfit)[3], 0.06, paste(round(1/Coef(myfit)[3], 1)), 
     pos=4, col="orange")
text(mean(data.bh$Y), 0.06, round(mean(data.bh$Y), 1),
     pos=4)
text(mean(pred.bh$Y), 0.05, round(mean(pred.bh$Y), 1),
     pos=4, col='grey')
text(80, 0.04, paste(scales::percent(Coef(myfit)[1]), "late testers"))

By race: O

data.o <- data %>% filter(race == "O")
summary(data.o)

##        Y              race          
##  Min.   :  0.10   Length:395        
##  1st Qu.:  4.00   Class :character  
##  Median : 11.00   Mode  :character  
##  Mean   : 22.94                     
##  3rd Qu.: 24.00                     
##  Max.   :295.00

myfit <- vglm(Y ~ 1, mix2exp, data = data.o, trace = TRUE)

## VGLM    linear loop  1 :  loglikelihood = -1594.0674
## VGLM    linear loop  2 :  loglikelihood = -1588.6032
## VGLM    linear loop  3 :  loglikelihood = -1585.6365
## VGLM    linear loop  4 :  loglikelihood = -1585.6126
## VGLM    linear loop  5 :  loglikelihood = -1585.6126

Coef(myfit)

##        phi    lambda1    lambda2 
## 0.28649333 0.01830980 0.09777659

pred.o <- data.frame(y1 = rexp(nn <- 1000, Coef(myfit)[2])) %>%
  mutate(y2 = rexp(nn, Coef(myfit)[3]),
         Y  = ifelse(runif(nn) < Coef(myfit)[1], y1, y2))


with(data.o, plot(density(Y), xlim = c(-5, 100), 
                ylim = c(0,0.07), lwd=2,
                main = "O: Exponential mixture fit to MLT",
                xlab = "Months since last test (scale truncated at 100)",
                sub = paste("No PrEP: n=", dim(data.o)[1])))
with(pred.o, lines(density(y1), col="blue", lty=2))
with(pred.o, lines(density(y2), col="orange", lty=2))
with(pred.o, lines(density(Y), col="gray", lty=2))

abline(v=1/Coef(myfit)[2], col="blue")
abline(v=1/Coef(myfit)[3], col="orange")
abline(v=mean(data.o$Y))
abline(v=mean(pred.o$Y), col="grey")

text(1/Coef(myfit)[2], 0.06, paste(round(1/Coef(myfit)[2], 1)), 
     pos=4, col="blue")
text(1/Coef(myfit)[3], 0.06, paste(round(1/Coef(myfit)[3], 1)), 
     pos=4, col="orange")
text(mean(data.o$Y), 0.06, round(mean(data.o$Y), 1),
     pos=4)
text(mean(pred.o$Y), 0.05, round(mean(pred.o$Y), 1),
     pos=4, col='grey')
text(80, 0.04, paste(scales::percent(Coef(myfit)[1]), "late testers"))

Method 2: Survival models

Here we can use the never tested cases also, and treat them as tested. We will assume first possible test age is 15 so their observation time is (current.age-15-1) years.

We also add epsilon to the value 0 months.

Sample filtering

Size	Sample criteria
832	Starting Sample
742	HIV-
557	Not on PrEP

coxdata <- allTestDF %>% 
  filter(prep==0) %>%
  mutate(mlt = case_when(never_tested=="T" ~ (age-14)*12,
                         mos_last_test == 0 ~ 0.5,
                         TRUE ~ mos_last_test),
         tested = ifelse(never_tested=="T", 0, 1),
         age.young = factor(ifelse(age.grp==1, "15-24", "25+")),
                            #levels = c("25+", "15-24")),
         race2 = factor(race, 
                        levels = c("O", "B", "H")),
         race.bipoc = factor(ifelse(race == "O", "O", "B+H"),
                             levels = c("O", "B+H")),
         region.ewa = factor(ifelse(region == "EasternWA", "EWA", "Other"))
         ) %>%
  filter(!is.na(mlt)) %>%
  select(age, age.grp, age.young, race, race2, race.bipoc, 
         region, region.ewa, snap.grp3,
         mlt, tested, never_tested, tests_2yr, ego.wawt)

saveRDS(coxdata, 
        file = here::here("Reports","WHAMPsurvey","MM","Testing",
                          "coxdata.RDS"))


# par(mfrow=c(1,3), oma=c(1,1,2,1))
# with(coxdata, boxplot(mlt~trunc(age)))
# with(coxdata, boxplot(mlt~age.grp, varwidth=T, ylim = c(0,100)))
# with(coxdata, boxplot(mlt~age.young, varwidth=T, ylim = c(0,100)))
# mtext("Months since last test by age", outer=T)

Survival plots

Overall

# survfit creates the KM plots

sfit <- survfit(Surv(mlt, tested) ~ 1 ,
                weights = ego.wawt,
                data=coxdata)

ggsurvplot(sfit, data=coxdata,
           xlim = c(0,200),
           break.time.by = 12,
           surv.median.line = "hv",
           main = "Survival Curve",
           xlab="Months",
           ylab="Proportion Not Tested")

Age

Clear age effects for youth.

sfit <- survfit(Surv(mlt, tested) ~ age.young ,
                weights = ego.wawt,
                data=coxdata)
ggsurvplot(sfit,
           pval = TRUE,
           xlim = c(0,120),
           break.time.by = 12,
           conf.int = T,
           title = "By age group (15-24 vs. older)",
           xlab = "Months since last test")

Race

Main

The small sample sizes for B & H really don’t support a detailed estimate.

sfit <- survfit(Surv(mlt, tested) ~ race ,
                weights = ego.wawt,
                data=coxdata)
ggsurvplot(sfit,
           pval = TRUE,
           conf.int = T,
           title = "By Race",
           xlab = "Months since last test")

Race x age

ggsurvplot_facet(sfit, data=coxdata, 
                 facet.by =  "age.young",
                 xlim = c(0,120),
                 break.time.by = 12,
                 pval = TRUE,
                 title = "Race, by age (15-24 vs. older)",
                 xlab = "Months since last test")

Race x region

ggsurvplot_facet(sfit, data=coxdata, 
                 facet.by =  "region",
                 pval = TRUE,
                 title = "Race, by region",
           xlab = "Months since last test")

Race x snap

Noisy, but highest risk group does show borderline significant differences. So there is an interaction by race and risk at the highest risk levels.

ggsurvplot_facet(sfit, data=coxdata, 
                 facet.by =  "snap.grp3",
                 pval = TRUE,
                 title = "Race, by snap.grp",
                 xlab = "Months since last test")

Region

Main

There’s almost no difference between King and Western WA, which supports collapsing these regions.

sfit <- survfit(Surv(mlt, tested) ~ region ,
                weights = ego.wawt,
                data=coxdata)

ggsurvplot(sfit,
           pval = TRUE,
           title = "By Region",
           conf.int = T,
           xlab = "Months since last test")

Region x age

Evidence of main effects, minimal interaction.

ggsurvplot_facet(sfit, data=coxdata, 
                 facet.by =  "age.young",
                 pval = TRUE,
                 title = "Region by age group (15-24 vs. older)",
                 xlab = "Months since last test")

SNAP

Significant differences.

sfit <- survfit(Surv(mlt, tested) ~ snap.grp3 ,
                weights = ego.wawt,
                data=coxdata)

ggsurvplot(sfit,
           pval = TRUE,
           conf.int = T,
           title = "By SNAP risk group",
           xlab = "Months since last test")

Age x Race x region

The lower rates of testing for 15-24 year olds is consistent except in Western WA. But the small sample size for this group is still a concern.

sfit <- survfit(Surv(mlt, tested) ~ age.young ,
                weights = ego.wawt,
                data=coxdata)

ggsp <- ggsurvplot_facet(sfit, data=coxdata, 
                         facet.by = c("race.bipoc", "region"),
                         xlim = c(0,120), # only show comparable time frame
                         break.time.by = 12,
                         pval = TRUE,
                         title = "Age 15-24, by race and region",
                         xlab = "Months since last test")
ggsp <- ggpar(ggsp,
              font.xtickslab = c(7, "plain"))
ggsp

Parametric Plots

For these models we will remove never-testers, defined as those who have not tested by age 45.

with(coxdata, table(never_tested, age.grp, useNA = "al"))

##             age.grp
## never_tested   1   2   3   4   5 <NA>
##         F     69 156  96  71  69    0
##         T     46  25  12   4   4    0
##         <NA>   0   0   0   0   0    0

coxdata <- coxdata %>% 
  filter(!(never_tested == "T" & age > 45))

Null model

sdf <- with(coxdata, Surv(mlt, tested))

KM.fit <- survfit(sdf ~ 1, data=coxdata, weights = ego.wawt)
exp.fit <- survreg(sdf ~ 1, data=coxdata, weights = ego.wawt, 
                   dist = "exponential")
weib.fit <- survreg(sdf ~ 1, data=coxdata, weights = ego.wawt, 
                    dist = "weibull")
lolog.fit <- survreg(sdf ~ 1, data=coxdata, weights = ego.wawt, 
                     dist = "loglogistic")

probs <- seq(.01,.99,by=.01)

pred.df = data.frame(y = rev(probs), 
                     exponential = predict(exp.fit, type="quantile", p=probs)[1,], 
                     weibull = predict(weib.fit, type="quantile", p=probs)[1,], 
                     lolog = predict(lolog.fit, type="quantile", p=probs)[1,])
pred.df_long = pivot_longer(pred.df, !y, names_to = "model", values_to = "x")

p <- ggsurvplot(KM.fit, data = coxdata,
           xlim = c(0,240),
           break.time.by = 24,
           surv.median.line = "hv",
           main = "Survival Curves: KM and parametric",
           xlab="Months",
           ylab="Proportion Not Tested",
           caption = "HIV-, not on PrEP, xlim 20 yrs") 

p <- p$plot + geom_line(data=pred.df_long, 
            aes(x=x, y=y, color=model),
            lwd=1) +
  scale_fill_discrete(guide=FALSE)

p

Age

age.levels = levels(coxdata$age.young)

## Kaplan-Meier estimator
km.age <- survfit(sdf ~ age.young, data = coxdata, weight = ego.wawt)

p <- ggsurvplot(km.age, 
                xlim = c(0,240),
                break.time.by = 24,
                #surv.median.line = "hv",
                title = "Cox PH Curves by age",
                xlab="Months",
                ylab="Proportion Not Tested",
                caption = "HIV-, not on PrEP, xlim 20 yrs",
                legend.labs = age.levels,
                legend.title = "KM:")
#p

Cox PH

## Cox regression by Efron method
cox.age <- coxph(sdf ~ age.young, data = coxdata, weight = ego.wawt)

pred.age <- survfit(cox.age, 
                    newdata = data.frame(age.young = age.levels))
pred.age.df <- data.frame(x = rep(pred.age$time, ncol(pred.age$surv)),
                          y = c(pred.age$surv[,1], 
                                pred.age$surv[,2]),
                          age.young = c(rep(age.levels[1], nrow(pred.age$surv)),
                                        rep(age.levels[2], nrow(pred.age$surv))))

coxp <- p$plot +
  geom_line(data = pred.age.df,
            aes(x=x, y=y, group=age.young, linetype=age.young, color=age.young), 
            lwd = .7) +
  labs(title = "Cox PH Curves by Age") +
  scale_linetype_discrete(name="CoxPH: ")

coxp

Exponential

## Exponential regression
exp.age <- survreg(sdf ~ age.young,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "exponential")

# pred now returns the x values (quantiles)
pred.age <- predict(exp.age, 
                     newdata = data.frame(age.young = age.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.age.df <- data.frame(x = c(pred.age[1,], pred.age[2,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.age)),
                           age = c(rep(age.levels[1], ncol(pred.age)),
                                    rep(age.levels[2], ncol(pred.age))))

expp <- p$plot +
  geom_line(data = pred.age.df,
            aes(x=x, y=y, group=age, color = age, linetype=age), 
            lwd=.8) +
  labs(title = "Exponential Curves by Age") +
  scale_linetype_discrete(name="Exponential: ")

expp

Weibull

## Weibull regression
weib.age <- survreg(sdf ~ age.young,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "weibull")

# pred now returns the x values (quantiles)
pred.age <- predict(weib.age, 
                     newdata = data.frame(age.young = age.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.age.df <- data.frame(x = c(pred.age[1,], pred.age[2,]),
                          y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.age)),
                          age = c(rep(age.levels[1], ncol(pred.age)),
                                  rep(age.levels[2], ncol(pred.age))))

weibp <- p$plot +
  geom_line(data = pred.age.df,
            aes(x=x, y=y, group=age, color = age, linetype=age), 
            lwd=.8) +
  labs(title = "Weibull Curves by Age") +
  scale_linetype_discrete(name="Weibull: ")

weibp

Log-logistic

lolog.age <- survreg(sdf ~ age.young,
                     data=coxdata,
                     weights = ego.wawt,
                     dist = "loglogistic")

# pred now returns the x values (quantiles)
pred.age <- predict(lolog.age, 
                     newdata = data.frame(age.young = age.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.age.df <- data.frame(x = c(pred.age[1,], pred.age[2,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.age)),
                           age = c(rep(age.levels[1], ncol(pred.age)),
                                    rep(age.levels[2], ncol(pred.age))))

lologp <- p$plot +
  geom_line(data = pred.age.df,
            aes(x=x, y=y, group=age, color = age, linetype=age), 
            lwd=.8) +
  labs(title = "Log-logistic Curves by Age") +
  scale_linetype_discrete(name="Lolog: ")

lologp

Race

race.levels = sort(unique(coxdata$race)) # alphabetic

## Kaplan-Meier curves
km.race <- survfit(sdf ~ race, data = coxdata, weight = ego.wawt)

p <- ggsurvplot(km.race, 
                xlim = c(0,240),
                break.time.by = 24,
                #surv.median.line = "hv",
                title = "Cox PH Curves by Race",
                xlab="Months",
                ylab="Proportion Not Tested",
                caption = "HIV-, not on PrEP, xlim 20 yrs",
                legend.labs = race.levels, # comment out to verify
                legend.title = "KM Race")
#p

Cox PH

## Cox regression by Efron method
cox.race <- coxph(sdf ~ race, data = coxdata, weight = ego.wawt)
pred.race <- survfit(cox.race, 
                newdata = data.frame(race = race.levels))
pred.race.df <- data.frame(x = rep(pred.race$time, ncol(pred.race$surv)),
                           y = c(pred.race$surv[,1], 
                                 pred.race$surv[,2], 
                                 pred.race$surv[,3]),
                           race = c(rep(race.levels[1],nrow(pred.race$surv)),
                                    rep(race.levels[2],nrow(pred.race$surv)),
                                    rep(race.levels[3],nrow(pred.race$surv))))

coxp <- p$plot +
  geom_line(data = pred.race.df,
            aes(x=x, y=y, group=race, color = race, linetype=race), 
            lwd=.8) +
  labs(title = "Cox PH Curves by Race") +
  scale_linetype_discrete(name="CoxPH: ")

coxp

Exponential

## Exponential regression
exp.race <- survreg(sdf ~ race,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "exponential")

# pred now returns the x values (quantiles)
pred.race <- predict(exp.race, 
                     newdata = data.frame(race = race.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.race.df <- data.frame(x = c(pred.race[1,], pred.race[2,], pred.race[3,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.race)),
                           race = c(rep(race.levels[1], ncol(pred.race)),
                                    rep(race.levels[2], ncol(pred.race)),
                                    rep(race.levels[3], ncol(pred.race))))

expp <- p$plot +
  geom_line(data = pred.race.df,
            aes(x=x, y=y, group=race, color = race, linetype=race), 
            lwd=.8) +
  labs(title = "Exponential Curves by Race") +
  scale_linetype_discrete(name="Exponential: ")

expp

Weibull

## Weibull regression
weib.race <- survreg(sdf ~ race,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "weibull")

# pred now returns the x values (quantiles)
pred.race <- predict(weib.race, 
                     newdata = data.frame(race = race.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.race.df <- data.frame(x = c(pred.race[1,], pred.race[2,], pred.race[3,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.race)),
                           race = c(rep(race.levels[1], ncol(pred.race)),
                                    rep(race.levels[2], ncol(pred.race)),
                                    rep(race.levels[3], ncol(pred.race))))

weibp <- p$plot +
  geom_line(data = pred.race.df,
            aes(x=x, y=y, group=race, color = race, linetype=race), 
            lwd=.8) +
  labs(title = "Weibull Curves by Race") +
  scale_linetype_discrete(name="Weibull: ")

weibp

Log-logistic

lolog.race <- survreg(sdf ~ race,
                     data=coxdata,
                     weights = ego.wawt,
                     dist = "loglogistic")

# pred now returns the x values (quantiles)
pred.race <- predict(lolog.race, 
                     newdata = data.frame(race = race.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.race.df <- data.frame(x = c(pred.race[1,], pred.race[2,], pred.race[3,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.race)),
                           race = c(rep(race.levels[1], ncol(pred.race)),
                                    rep(race.levels[2], ncol(pred.race)),
                                    rep(race.levels[3], ncol(pred.race))))

lologp <- p$plot +
  geom_line(data = pred.race.df,
            aes(x=x, y=y, group=race, color = race, linetype=race), 
            lwd=.8) +
  labs(title = "Log-logistic Curves by Race") +
  scale_linetype_discrete(name="Lolog: ")

lologp

Region

region.levels <- levels(coxdata$region.ewa)

## Kaplan-Meier estimator
km.region <- survfit(sdf ~ region.ewa, data = coxdata, weight = ego.wawt)

p <- ggsurvplot(km.region, 
                xlim = c(0,240),
                break.time.by = 24,
                #surv.median.line = "hv",
                title = "Cox PH Curves by region",
                xlab="Months",
                ylab="Proportion Not Tested",
                caption = "HIV-, not on PrEP, xlim 20 yrs",
                legend.labs = region.levels, # comment out to verify
                legend.title = "KM region")
#p

Cox PH

## Cox regression by Efron method
cox.region <- coxph(sdf ~ region.ewa, data = coxdata, weight = ego.wawt)

pred.region <- survfit(cox.region, 
                       newdata = data.frame(region.ewa = region.levels))
pred.region.df <- data.frame(x = rep(pred.region$time, ncol(pred.region$surv)),
                          region.ewa = c(rep(region.levels[1], 
                                             nrow(pred.region$surv)), 
                                         rep(region.levels[2],
                                             nrow(pred.region$surv))),
                          y = c(pred.region$surv[,1], 
                                pred.region$surv[,2]))


coxp <- p$plot +
  geom_line(data = pred.region.df,
            aes(x=x, y=y, group=region.ewa, linetype=region.ewa, color=region.ewa), 
            lwd = .7) +
  labs(title = "Cox PH Curves by Region") +
  scale_linetype_discrete(name="CoxPH: ")

coxp

Exponential

## Exponential regression
exp.region <- survreg(sdf ~ region.ewa,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "exponential")

# pred now returns the x values (quantiles)
pred.region <- predict(exp.region, 
                     newdata = data.frame(region.ewa = region.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.region.df <- data.frame(x = c(pred.region[1,], pred.region[2,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.region)),
                           region = c(rep(region.levels[1], ncol(pred.region)),
                                    rep(region.levels[2], ncol(pred.region))))

expp <- p$plot +
  geom_line(data = pred.region.df,
            aes(x=x, y=y, group=region, color = region, linetype=region), 
            lwd=.8) +
  labs(title = "Exponential Curves by Region") +
  scale_linetype_discrete(name="Exponential: ")

expp

Weibull

## Weibull regression
weib.region <- survreg(sdf ~ region.ewa,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "weibull")

# pred now returns the x values (quantiles)
pred.region <- predict(weib.region, 
                     newdata = data.frame(region.ewa = region.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.region.df <- data.frame(x = c(pred.region[1,], pred.region[2,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.region)),
                           region = c(rep(region.levels[1], ncol(pred.region)),
                                    rep(region.levels[2], ncol(pred.region))))

weibp <- p$plot +
  geom_line(data = pred.region.df,
            aes(x=x, y=y, group=region, color = region, linetype=region), 
            lwd=.8) +
  labs(title = "Weibull Curves by Region") +
  scale_linetype_discrete(name="Weibull: ")

weibp

Log-logistic

lolog.region <- survreg(sdf ~ region.ewa,
                     data=coxdata,
                     weights = ego.wawt,
                     dist = "loglogistic")

# pred now returns the x values (quantiles)
pred.region <- predict(lolog.region, 
                     newdata = data.frame(region.ewa = region.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.region.df <- data.frame(x = c(pred.region[1,], pred.region[2,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.region)),
                           region = c(rep(region.levels[1], ncol(pred.region)),
                                    rep(region.levels[2], ncol(pred.region))))

lologp <- p$plot +
  geom_line(data = pred.region.df,
            aes(x=x, y=y, group=region, color = region, linetype=region), 
            lwd=.8) +
  labs(title = "Log-logistic Curves by Region") +
  scale_linetype_discrete(name="Lolog: ")

lologp

SNAP

snap.levels = levels(coxdata$snap.grp3)

## Kaplan-Meier estimator
km.snap <- survfit(sdf ~ snap.grp3, data = coxdata, weight = ego.wawt)

p <- ggsurvplot(km.snap, 
                xlim = c(0,240),
                break.time.by = 24,
                #surv.median.line = "hv",
                xlab="Months",
                ylab="Proportion Not Tested",
                caption = "HIV-, not on PrEP, xlim 20 yrs",
                legend.labs = snap.levels,
                legend.title = "KM SNAP")
#p

Cox PH

## Cox regression by Efron method
cox.snap <- coxph(sdf ~ snap.grp3, data = coxdata, weight = ego.wawt)

pred.snap <- survfit(cox.snap, 
                newdata = data.frame(snap.grp3 = snap.levels))
pred.snap.df <- data.frame(x = rep(pred.snap$time, ncol(pred.snap$surv)),
                       snap.grp3 = c(rep(snap.levels[1],
                                         nrow(pred.snap$surv)), # order follows factor
                                     rep(snap.levels[2],
                                         nrow(pred.snap$surv)),
                                     rep(snap.levels[3],
                                         nrow(pred.snap$surv))),
                       y = c(pred.snap$surv[,1], 
                             pred.snap$surv[,2], 
                             pred.snap$surv[,3]))

coxp <- p$plot +
  geom_line(data = pred.snap.df,
            aes(x=x, y=y, group=snap.grp3, linetype=snap.grp3, color=snap.grp3), 
            lwd = .7) +
  labs(title = "Cox PH Curves by SNAP") +
  scale_linetype_discrete(name="CoxPH: ")

coxp

Exponential

## Exponential regression
exp.snap <- survreg(sdf ~ snap.grp3,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "exponential")

# pred now returns the x values (quantiles)
pred.snap <- predict(exp.snap, 
                     newdata = data.frame(snap.grp3 = snap.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.snap.df <- data.frame(x = c(pred.snap[1,], pred.snap[2,], pred.snap[3,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.snap)),
                           snap = c(rep(snap.levels[1], ncol(pred.snap)),
                                    rep(snap.levels[2], ncol(pred.snap)),
                                    rep(snap.levels[3], ncol(pred.snap))))

expp <- p$plot +
  geom_line(data = pred.snap.df,
            aes(x=x, y=y, group=snap, color = snap, linetype=snap), 
            lwd=.8) +
  labs(title = "Exponential Curves by SNAP") +
  scale_linetype_discrete(name="Exponential: ")

expp

Weibull

## Weibull regression
weib.snap <- survreg(sdf ~ snap.grp3,
                    data=coxdata,
                    weights = ego.wawt,
                    dist = "weibull")

# pred now returns the x values (quantiles)
pred.snap <- predict(weib.snap, 
                     newdata = data.frame(snap.grp3 = snap.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.snap.df <- data.frame(x = c(pred.snap[1,], pred.snap[2,], pred.snap[3,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.snap)),
                           snap = c(rep(snap.levels[1], ncol(pred.snap)),
                                    rep(snap.levels[2], ncol(pred.snap)),
                                    rep(snap.levels[3], ncol(pred.snap))))

weibp <- p$plot +
  geom_line(data = pred.snap.df,
            aes(x=x, y=y, group=snap, color = snap, linetype=snap), 
            lwd=.8) +
  labs(title = "Weibull Curves by SNAP") +
  scale_linetype_discrete(name="Weibull: ")

weibp

Log-logistic

lolog.snap <- survreg(sdf ~ snap.grp3,
                     data=coxdata,
                     weights = ego.wawt,
                     dist = "loglogistic")

# pred now returns the x values (quantiles)
pred.snap <- predict(lolog.snap, 
                     newdata = data.frame(snap.grp3 = snap.levels),
                     type="quantile", p=seq(.01,.99,by=.01))
pred.snap.df <- data.frame(x = c(pred.snap[1,], pred.snap[2,], pred.snap[3,]),
                           y = rep(rev(seq(0.01, 0.99, by = 0.01)), nrow(pred.snap)),
                           snap = c(rep(snap.levels[1], ncol(pred.snap)),
                                    rep(snap.levels[2], ncol(pred.snap)),
                                    rep(snap.levels[3], ncol(pred.snap))))

lologp <- p$plot +
  geom_line(data = pred.snap.df,
            aes(x=x, y=y, group=snap, color = snap, linetype=snap), 
            lwd=.8) +
  labs(title = "Log-logistic Curves by SNAP") +
  scale_linetype_discrete(name="Lolog: ")

lologp

Full model fits

Cox PH

Estimates

cox.fit <- coxph(Surv(mlt, tested) ~
                   age.young + 
                   race2 +
                   region.ewa + snap.grp3,
                 weights = ego.wawt,
                 data=coxdata)
summary(cox.fit)

## Call:
## coxph(formula = Surv(mlt, tested) ~ age.young + race2 + region.ewa + 
##     snap.grp3, data = coxdata, weights = ego.wawt)
## 
##   n= 544, number of events= 461 
## 
##                    coef exp(coef) se(coef) robust se      z Pr(>|z|)    
## age.young25+     0.6692    1.9527   0.1301    0.1634  4.095 4.23e-05 ***
## race2B          -0.3323    0.7173   0.2307    0.3335 -0.996   0.3191    
## race2H          -0.3977    0.6719   0.1574    0.1814 -2.193   0.0283 *  
## region.ewaOther  0.4454    1.5611   0.1584    0.1735  2.566   0.0103 *  
## snap.grp32-5     0.5404    1.7167   0.1125    0.1292  4.182 2.89e-05 ***
## snap.grp36-125   1.2495    3.4885   0.1398    0.1574  7.936 2.09e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## age.young25+       1.9527     0.5121    1.4175    2.6899
## race2B             0.7173     1.3941    0.3731    1.3791
## race2H             0.6719     1.4884    0.4709    0.9587
## region.ewaOther    1.5611     0.6406    1.1110    2.1936
## snap.grp32-5       1.7167     0.5825    1.3326    2.2115
## snap.grp36-125     3.4885     0.2867    2.5623    4.7496
## 
## Concordance= 0.654  (se = 0.017 )
## Likelihood ratio test= 113.2  on 6 df,   p=<2e-16
## Wald test            = 92.86  on 6 df,   p=<2e-16
## Score (logrank) test = 123.2  on 6 df,   p=<2e-16,   Robust = 74.35  p=5e-14
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).

Forest plot

ggforest(cox.fit, data = coxdata)

Residual plot

ggcoxdiagnostics(cox.fit, type = "deviance")

Term plots

Base R plot, kinda neat

termplot(cox.fit, data=coxdata, partial.resid = T)

Exponential

exp.fit <- survreg(Surv(mlt, tested) ~
                   age.young + 
                   race2 +
                   region.ewa + snap.grp3,
                 data=coxdata,
                 weights = ego.wawt,
                 dist = "exponential")
summary(exp.fit)

## 
## Call:
## survreg(formula = Surv(mlt, tested) ~ age.young + race2 + region.ewa + 
##     snap.grp3, data = coxdata, weights = ego.wawt, dist = "exponential")
##                  Value Std. Error      z       p
## (Intercept)      5.164      0.205  25.14 < 2e-16
## age.young25+    -0.523      0.129  -4.05 5.1e-05
## race2B           0.295      0.230   1.28     0.2
## race2H           0.753      0.164   4.59 4.4e-06
## region.ewaOther -0.772      0.161  -4.79 1.6e-06
## snap.grp32-5    -0.598      0.111  -5.38 7.5e-08
## snap.grp36-125  -1.825      0.140 -13.00 < 2e-16
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -2081.8   Loglik(intercept only)= -2165.6
##  Chisq= 167.6 on 6 degrees of freedom, p= 1.4e-33 
## Number of Newton-Raphson Iterations: 6 
## n= 544

Weibull

weib.fit <- survreg(Surv(mlt, tested) ~
                   age.young + 
                   race2 +
                   region.ewa + snap.grp3,
                 data=coxdata,
                 weights = ego.wawt,
                 dist = "weibull")
summary(weib.fit)

## 
## Call:
## survreg(formula = Surv(mlt, tested) ~ age.young + race2 + region.ewa + 
##     snap.grp3, data = coxdata, weights = ego.wawt, dist = "weibull")
##                   Value Std. Error     z       p
## (Intercept)      5.3802     0.3152 17.07 < 2e-16
## age.young25+    -0.9066     0.2057 -4.41 1.0e-05
## race2B           0.4843     0.3626  1.34 0.18159
## race2H           0.7321     0.2480  2.95 0.00315
## region.ewaOther -0.8321     0.2486 -3.35 0.00082
## snap.grp32-5    -0.8010     0.1754 -4.57 4.9e-06
## snap.grp36-125  -1.9970     0.2143 -9.32 < 2e-16
## Log(scale)       0.4531     0.0363 12.50 < 2e-16
## 
## Scale= 1.57 
## 
## Weibull distribution
## Loglik(model)= -1981.1   Loglik(intercept only)= -2034.9
##  Chisq= 107.73 on 6 degrees of freedom, p= 6.1e-21 
## Number of Newton-Raphson Iterations: 5 
## n= 544

Log-logistic

lolog.fit <- survreg(Surv(mlt, tested) ~
                   age.young + 
                   race2 +
                   region.ewa + snap.grp3,
                 data=coxdata,
                 weights = ego.wawt,
                 dist = "loglogistic")
summary(lolog.fit)

## 
## Call:
## survreg(formula = Surv(mlt, tested) ~ age.young + race2 + region.ewa + 
##     snap.grp3, data = coxdata, weights = ego.wawt, dist = "loglogistic")
##                   Value Std. Error     z       p
## (Intercept)      4.2909     0.2792 15.37 < 2e-16
## age.young25+    -0.8176     0.1807 -4.53 6.0e-06
## race2B           0.5861     0.3407  1.72   0.085
## race2H           0.3059     0.2368  1.29   0.196
## region.ewaOther -0.4879     0.2349 -2.08   0.038
## snap.grp32-5    -1.0378     0.1674 -6.20 5.7e-10
## snap.grp36-125  -1.9731     0.2050 -9.62 < 2e-16
## Log(scale)      -0.0482     0.0396 -1.22   0.224
## 
## Scale= 0.953 
## 
## Log logistic distribution
## Loglik(model)= -1932.6   Loglik(intercept only)= -1993.8
##  Chisq= 122.35 on 6 degrees of freedom, p= 5.2e-24 
## Number of Newton-Raphson Iterations: 4 
## n= 544

Observed vs. predicted MLT by model

For the MLT metric, it’s not clear that there is a Cox PH prediction, but we can compare all of the parametric models

Individual level

These all look pretty wonky. Note the y-axis scale differences across the models.

# for CoxPH we predict using "expected" 
# not sure how to compare this to observed, since 
# it looks nothing like MLT
pred.cox <- predict(cox.fit, type="expected", se.fit=TRUE)

# for survreg, response=mlt so risk = 1/response
pred.exp <- predict(exp.fit, type="response")
pred.weib <- predict(weib.fit, type="response")
pred.lolog <- predict(lolog.fit, type="response")

result.df <- data.frame(mlt = coxdata$mlt,
                        exp = pred.exp,
                        weibull = pred.weib,
                        lolog = pred.lolog)
GGally::ggpairs(result.df,
                title = "Individual level observed vs. predicted MLT")

Distribution level

Log-logistic looks like the best fit, but it does not do a great job with the long tail.

ggplot(result.df) +
  geom_histogram(aes(x=mlt, y = ..density..)) +
  scale_x_continuous(limits = c(0, 300)) +
  geom_density(data = result.df %>% 
                 select(-mlt) %>%
                 pivot_longer(everything(),
                              names_to = "model",
                              values_to = "fit"),
               aes(x=fit, group=model, color=model),
               adjust=3) +
  labs(title = "Observed vs. Predicted MLT Distns: Parametric survival models",
       x = "Months since last test",
       caption = "Full Models; xlab truncated at 300")

Predicted testing risks by model

These are the predicted risks of testing, by covariate, that we will use to govern the dynamics of testing in EpiModel. Here we can compare to the Cox PH estimates, and again, the Log-logistic wins – it is almost identical to the non-parametric Cox PH risk estimates by covariate.

All models

# Set up covariate DF for newdata
covardf <- with(coxdata, expand.grid(
  age.young = unique(age.young),
  race2 = unique(race2),
  region.ewa = unique(region.ewa),
  snap.grp3 = unique(snap.grp3)))

# for CoxPH we predict relative risk; 
# rr = baseline hazard * absolute risk
pred.cox <- predict(cox.fit, newdata = covardf, type="risk", se.fit=TRUE)
pred.cox.df <- data.frame(covardf, pred = pred.cox$fit)

# for survreg, response=mlt so risk = 1/response
pred.exp <- predict(exp.fit, newdata=covardf, type="response")
pred.exp.df <- data.frame(covardf, pred = 1/pred.exp)

pred.weib <- predict(weib.fit, newdata=covardf, type="response")
pred.weib.df <- data.frame(covardf, pred = 1/pred.weib)

pred.lolog <- predict(lolog.fit, newdata=covardf, type="response")
pred.lolog.df <- data.frame(covardf, pred = 1/pred.lolog)

result.df <- data.frame(cox = pred.cox.df$pred,
                        exp = pred.exp.df$pred,
                        weibull = pred.weib.df$pred,
                        lolog = pred.lolog.df$pred)
GGally::ggpairs(result.df,
                title = "Predicted risk of testing in covariate groups by model")

Cox PH vs. Lolog

lmfit <- lm(result.df$lolog ~ result.df$cox)

plot(result.df$cox, result.df$lolog,
     #xlim = c(0, 0.3), ylim = c(0, 0.3),
     lwd = 2, col="slateblue",
     main = "Predicted Risk: Cox PH vs. Log-logistic",
     xlab = "Cox PH RR", ylab = "Lolog risk",
     sub = "Covariate grid: age.young, race, region.ewa, snap.grp3")
abline(lmfit$coef, col="red")

Testing Report

Martina Morris

2020-12-29

Preliminaries

Descriptives

Test group distribution

Age group

Race

Region

SNAP Group (3 category)

Never testers

Age

Race

Region

Months since last test

Overall

Exp fits by race

By age

Tests in last 2 years

Histograms

QQplot

Models

Method 1: Mixed exponential

Sample filtering:

Overall

By race: B&H

By race: O

Method 2: Survival models

Sample filtering

Survival plots

Overall

Age

Race

Main

Race x age

Race x region

Race x snap

Region

Main

Region x age

SNAP

Age x Race x region

Parametric Plots

Null model

Age

Cox PH

Exponential

Weibull

Log-logistic

Race

Cox PH

Exponential

Weibull

Log-logistic

Region

Cox PH

Exponential

Weibull

Log-logistic

SNAP

Cox PH

Exponential

Weibull

Log-logistic

Full model fits

Cox PH

Estimates

Forest plot

Residual plot

Term plots

Exponential

Weibull

Log-logistic

Observed vs. predicted MLT by model

Individual level

Distribution level

Predicted testing risks by model

All models

Cox PH vs. Lolog