Compare TB at initiation from pat file, visit file and TB module
Published
March 22, 2024
# check consistency of tb_fhv from pat_file versus tb_module over time (yearly)pat_checked %>%filter(method_into_art=="New") %>%tabyl(tb_fhv)
tb_fhv n percent
No TB 139863 5.645146e-01
TB 25195 1.016920e-01
Not applicable 24 9.686872e-05
Not ascertained 82676 3.336966e-01
# Of those with TB from the pat file, how many appear in TB modulepat_checked %>%filter(method_into_art=="New") %>%filter(tb_fhv =="TB") %>%count(study_id %in% tb$study_id) %>%mutate(percent = n/sum(n)) # 64% do not appear
# Plot this over timepat_checked %>%filter(method_into_art=="New") %>%filter(tb_fhv =="TB",between(year(haart_dmy),2012,2020)) %>%mutate(haart_year =year(haart_dmy)) %>%group_by(haart_year) %>%count(study_id %in% tb$study_id) %>%ggplot(aes(x=haart_year,y=n,fill=`study_id %in% tb$study_id`)) +geom_col(position ="fill") +labs(y="%",title ="Figure 1. TB at haart_dmy in pat_file that appears in TB module over time") +theme(legend.position ="top")
# Check the reverse situation, thus TB module versus the pat and visit files# Participants in TB module with a tb_start_dmy <= haart_dmy that appear in the pat file as having TB at initiationcheck_3 <- pat_checked %>%filter(method_into_art=="New") %>%filter(tb_fhv =="TB",between(year(haart_dmy),2012,2020)) %>%mutate(haart_year =year(haart_dmy)) tb %>%left_join(pat_checked,join_by(study_id)) %>%filter(between(year(tb_start_dmy),2012,2020), tb_start_dmy <= haart_dmy) %>%distinct(study_id,tb_start_dmy,.keep_all = T) %>%group_by(year(tb_start_dmy)) %>%count(study_id %in% check_3$study_id) %>%ggplot(aes(x=`year(tb_start_dmy)`,y=n,fill=`study_id %in% check_3$study_id`)) +geom_col(position ="fill") +labs(y="%",title ="Figure 2a. Participants in TB module with a tb_start_dmy <= haart_dmy \nthat appear in the pat file as having TB at initiation") +theme(legend.position ="top")
# Participants in TB module with a tb_start_dmy <= haart_dmy that appear in the visit as being on TB treatment at initiation as having TB at initiationcheck_3b <- pat_checked %>%filter(method_into_art=="New",between(year(haart_dmy),2012,2020)) %>%left_join(visit_checked %>%distinct(study_id,visit_dmy,.keep_all = T),join_by(study_id,haart_dmy == visit_dmy)) %>%filter(tb_status =="4")tb %>%filter(between(year(tb_start_dmy),2012,2020)) %>%left_join(pat_checked,join_by(study_id)) %>%filter(tb_start_dmy <= haart_dmy) %>%distinct(study_id,tb_start_dmy,.keep_all = T) %>%group_by(year(tb_start_dmy)) %>%count(study_id %in% check_3b$study_id) %>%ggplot(aes(x=`year(tb_start_dmy)`,y=n,fill=`study_id %in% check_3b$study_id`)) +geom_col(position ="fill") +labs(y="%",title ="Figure 2c. Participants in TB module with a tb_start_dmy <= haart_dmy \nthat appear in the visit file as being on TB treatment at initiation") +theme(legend.position ="top")
# Check TB status at initiation from the visit file (thus at haart_dmy == visit_dmy)# Filter haart_dmy == visit_dmy and compare tb_fhv vs tb_statuscheck_6 <- pat_checked %>%filter(method_into_art=="New",between(year(haart_dmy),2012,2020)) %>%left_join(visit_checked %>%distinct(study_id,visit_dmy,.keep_all = T),join_by(study_id,haart_dmy == visit_dmy)) %>%mutate(tb_status =factor(tb_status,labels =c("No symptoms","Symptoms present and with sputum test done","Symptoms present, sputum test not done","On TB treatment","Symptom screening not done","Screening status unknown")),tb_fhv =fct_drop(tb_fhv,"Not applicable"),haart_year =year(haart_dmy)) tabyl(check_6,tb_status,tb_fhv)
tb_status No TB TB Not ascertained
No symptoms 117584 4748 71754
Symptoms present and with sputum test done 1240 96 584
Symptoms present, sputum test not done 2 0 2
On TB treatment 509 17299 187
Symptom screening not done 8797 368 4079
Screening status unknown 2754 165 2318
<NA> 4 0 1
# 509 have no TB and 187 Not ascertained but are on TB treatment from visit file# How many of these are in the TB module ?check_6 %>%filter(tb_fhv %in%c("No TB","Not ascertained") & tb_status =="On TB treatment") %>%group_by(haart_year) %>%count(study_id %in% tb$study_id) %>%ggplot(aes(x=haart_year,y=n,fill=`study_id %in% tb$study_id`)) +geom_col(position ="fill") +labs(y="%",title ="Figure 3. Of 509 who have no TB and 187 Not ascertained but are on TB treatment from visit file.\n Proportion that appears in TB module over time (total 9.77%, n = 68/696)") +theme(legend.position ="top")