Compare TB at initiation from pat file, visit file and TB module

Published

March 22, 2024

# check consistency of tb_fhv from pat_file versus tb_module over time (yearly)

pat_checked %>%
  filter(method_into_art=="New") %>%
  tabyl(tb_fhv)
          tb_fhv      n      percent
           No TB 139863 5.645146e-01
              TB  25195 1.016920e-01
  Not applicable     24 9.686872e-05
 Not ascertained  82676 3.336966e-01
# Of those with TB from the pat file, how many appear in TB module
pat_checked %>%
   filter(method_into_art=="New") %>%
  filter(tb_fhv == "TB") %>%
  count(study_id %in% tb$study_id) %>%
  mutate(percent = n/sum(n)) # 64% do not appear
# A tibble: 2 × 3
  `study_id %in% tb$study_id`     n percent
  <lgl>                       <int>   <dbl>
1 FALSE                       16236   0.644
2 TRUE                         8959   0.356
# Plot this over time
pat_checked %>%
  filter(method_into_art=="New") %>%
  filter(tb_fhv == "TB",between(year(haart_dmy),2012,2020)) %>%
  mutate(haart_year = year(haart_dmy)) %>%
  group_by(haart_year) %>%
  count(study_id %in% tb$study_id) %>%
  ggplot(aes(x=haart_year,y=n,fill=`study_id %in% tb$study_id`)) + geom_col(position = "fill") +
  labs(y="%",title = "Figure 1. TB at haart_dmy in pat_file that appears in TB module over time") + 
  theme(legend.position = "top") 

# Check the reverse situation, thus TB module versus the pat and visit files
# Participants in TB module with a tb_start_dmy <= haart_dmy that appear in the pat file as having TB at initiation
check_3 <- pat_checked %>%
   filter(method_into_art=="New") %>%
  filter(tb_fhv == "TB",between(year(haart_dmy),2012,2020)) %>%
  mutate(haart_year = year(haart_dmy)) 

tb %>%
   left_join(pat_checked,join_by(study_id)) %>%
  filter(between(year(tb_start_dmy),2012,2020),
         tb_start_dmy <= haart_dmy) %>%
  distinct(study_id,tb_start_dmy,.keep_all = T) %>%
  group_by(year(tb_start_dmy)) %>%
  count(study_id %in% check_3$study_id) %>%
  ggplot(aes(x=`year(tb_start_dmy)`,y=n,fill=`study_id %in% check_3$study_id`)) + geom_col(position = "fill") +
  labs(y="%",title = "Figure 2a. Participants in TB module with a tb_start_dmy <= haart_dmy \nthat appear in the pat file as having TB at initiation") + 
  theme(legend.position = "top") 

# Participants in TB module with a tb_start_dmy <= haart_dmy that appear in the visit as being on TB treatment at initiation as having TB at initiation
check_3b <- pat_checked %>%
  filter(method_into_art=="New",
         between(year(haart_dmy),2012,2020)) %>%
  left_join(visit_checked %>% distinct(study_id,visit_dmy,.keep_all = T),
            join_by(study_id,haart_dmy == visit_dmy)) %>%
  filter(tb_status == "4")

tb %>%
  filter(between(year(tb_start_dmy),2012,2020)) %>%
  left_join(pat_checked,join_by(study_id)) %>%
  filter(tb_start_dmy <= haart_dmy) %>%
  distinct(study_id,tb_start_dmy,.keep_all = T) %>%
  group_by(year(tb_start_dmy)) %>%
  count(study_id %in% check_3b$study_id) %>%
  ggplot(aes(x=`year(tb_start_dmy)`,y=n,fill=`study_id %in% check_3b$study_id`)) + geom_col(position = "fill") +
  labs(y="%",title = "Figure 2c. Participants in TB module with a tb_start_dmy <= haart_dmy \nthat appear in the visit file as being on TB treatment at initiation") + 
  theme(legend.position = "top") 

# Check TB status at initiation from the visit file (thus at haart_dmy == visit_dmy)
# Filter haart_dmy == visit_dmy and compare tb_fhv vs tb_status

check_6 <- pat_checked %>%
  filter(method_into_art=="New",
         between(year(haart_dmy),2012,2020)) %>%
  left_join(visit_checked %>% distinct(study_id,visit_dmy,.keep_all = T),
            join_by(study_id,haart_dmy == visit_dmy)) %>%
  mutate(tb_status = factor(tb_status,
                            labels = c("No symptoms",
                                       "Symptoms present and with sputum test done",
                                       "Symptoms present, sputum test not done",
                                       "On TB treatment",
                                       "Symptom screening not done",
                                       "Screening status unknown")),
         tb_fhv = fct_drop(tb_fhv,"Not applicable"),
         haart_year = year(haart_dmy)) 
  tabyl(check_6,tb_status,tb_fhv)
                                  tb_status  No TB    TB Not ascertained
                                No symptoms 117584  4748           71754
 Symptoms present and with sputum test done   1240    96             584
     Symptoms present, sputum test not done      2     0               2
                            On TB treatment    509 17299             187
                 Symptom screening not done   8797   368            4079
                   Screening status unknown   2754   165            2318
                                       <NA>      4     0               1
# 509 have no TB and 187  Not ascertained but are on TB treatment from visit file
# How many of these are in the TB module ?

check_6 %>%
  filter(tb_fhv %in% c("No TB","Not ascertained") & tb_status == "On TB treatment") %>%
  group_by(haart_year) %>%
  count(study_id %in% tb$study_id) %>%
    ggplot(aes(x=haart_year,y=n,fill=`study_id %in% tb$study_id`)) + geom_col(position = "fill") +
  labs(y="%",title = "Figure 3. Of 509 who have no TB and 187 Not ascertained but are on TB treatment from visit file.\n Proportion that appears in TB module over time (total 9.77%, n = 68/696)") + 
  theme(legend.position = "top")