#importing data 
TB <- read.csv("SP Data.csv")


#transform duration
trtstart <- as.Date(TB$Date.Started.Tx, "%m/%d/%Y")
outcomedate <- as.Date(TB$Date.of.Outcome.Status, "%m/%d/%Y")

Time <- difftime(outcomedate,trtstart, units = "days") 
TB$Time <- as.numeric(Time) #time difference in days


#event for "cured"
cured <- if_else(TB$Outcome.Status=="CURED",1,0)
#event for "treatment completed"
trtcomp <- if_else(TB$Outcome.Status=="TREATMENT COMPLETED",1,0)
#column for status wherein an event is when outcome is cured or treatment completed
TB$Status <- if_else(cured==1|trtcomp==1,1,0)

#splitting age into groups
TB$Age.Group <- cut(TB$Age, breaks = c(seq(0,100, by=20)), right = FALSE)

#consider only the NEW cases
TB <- TB[TB$Registration=="NEW",]

#final data frame
TB <- data.frame(TB$Age, TB$Sex, TB$Residence, TB$Referral, TB$Anatomical, TB$Bacteriologic, TB$Time, TB$Status, TB$Age.Group)
#frequencies
library(summarytools)
## 
## Attaching package: 'summarytools'
## The following object is masked from 'package:tibble':
## 
##     view
freq(TB$TB.Age)
## Frequencies  
## TB$.Age  
## Type: Numeric  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##        0.75      1      0.27           0.27      0.27           0.27
##           2      1      0.27           0.54      0.27           0.54
##           3      3      0.81           1.35      0.81           1.35
##           4      1      0.27           1.62      0.27           1.62
##           5      2      0.54           2.16      0.54           2.16
##           7      1      0.27           2.43      0.27           2.43
##           8      1      0.27           2.70      0.27           2.70
##           9      1      0.27           2.96      0.27           2.96
##          10      1      0.27           3.23      0.27           3.23
##          13      2      0.54           3.77      0.54           3.77
##          14      1      0.27           4.04      0.27           4.04
##          16      3      0.81           4.85      0.81           4.85
##          17      3      0.81           5.66      0.81           5.66
##          18      5      1.35           7.01      1.35           7.01
##          19      6      1.62           8.63      1.62           8.63
##          20      1      0.27           8.89      0.27           8.89
##          21      1      0.27           9.16      0.27           9.16
##          22      3      0.81           9.97      0.81           9.97
##          23      9      2.43          12.40      2.43          12.40
##          24      3      0.81          13.21      0.81          13.21
##          25      9      2.43          15.63      2.43          15.63
##          26      7      1.89          17.52      1.89          17.52
##          27      8      2.16          19.68      2.16          19.68
##          28      4      1.08          20.75      1.08          20.75
##          29      7      1.89          22.64      1.89          22.64
##          30      8      2.16          24.80      2.16          24.80
##          32      5      1.35          26.15      1.35          26.15
##          33      7      1.89          28.03      1.89          28.03
##          34      2      0.54          28.57      0.54          28.57
##          35      8      2.16          30.73      2.16          30.73
##          36      3      0.81          31.54      0.81          31.54
##          37      7      1.89          33.42      1.89          33.42
##          38      3      0.81          34.23      0.81          34.23
##          39      6      1.62          35.85      1.62          35.85
##          40      7      1.89          37.74      1.89          37.74
##          41      3      0.81          38.54      0.81          38.54
##          42      5      1.35          39.89      1.35          39.89
##          43      5      1.35          41.24      1.35          41.24
##          44      4      1.08          42.32      1.08          42.32
##          45      9      2.43          44.74      2.43          44.74
##          46      7      1.89          46.63      1.89          46.63
##          47      3      0.81          47.44      0.81          47.44
##          48      6      1.62          49.06      1.62          49.06
##          49      5      1.35          50.40      1.35          50.40
##          50     11      2.96          53.37      2.96          53.37
##          51      8      2.16          55.53      2.16          55.53
##          52      7      1.89          57.41      1.89          57.41
##          53      8      2.16          59.57      2.16          59.57
##          54      6      1.62          61.19      1.62          61.19
##          55      7      1.89          63.07      1.89          63.07
##          56      6      1.62          64.69      1.62          64.69
##          57      8      2.16          66.85      2.16          66.85
##          58      7      1.89          68.73      1.89          68.73
##          59      6      1.62          70.35      1.62          70.35
##          60      8      2.16          72.51      2.16          72.51
##          61      7      1.89          74.39      1.89          74.39
##          62     10      2.70          77.09      2.70          77.09
##          63      8      2.16          79.25      2.16          79.25
##          64      3      0.81          80.05      0.81          80.05
##          65     11      2.96          83.02      2.96          83.02
##          66      5      1.35          84.37      1.35          84.37
##          67      9      2.43          86.79      2.43          86.79
##          68      4      1.08          87.87      1.08          87.87
##          69      6      1.62          89.49      1.62          89.49
##          70      5      1.35          90.84      1.35          90.84
##          71      6      1.62          92.45      1.62          92.45
##          72      1      0.27          92.72      0.27          92.72
##          73      3      0.81          93.53      0.81          93.53
##          74      2      0.54          94.07      0.54          94.07
##          75      2      0.54          94.61      0.54          94.61
##          76      4      1.08          95.69      1.08          95.69
##          77      5      1.35          97.04      1.35          97.04
##          78      4      1.08          98.11      1.08          98.11
##          79      1      0.27          98.38      0.27          98.38
##          80      3      0.81          99.19      0.81          99.19
##          83      1      0.27          99.46      0.27          99.46
##          88      1      0.27          99.73      0.27          99.73
##          89      1      0.27         100.00      0.27         100.00
##        <NA>      0                               0.00         100.00
##       Total    371    100.00         100.00    100.00         100.00
freq(TB$TB.Sex)
## Frequencies  
## TB$.Sex  
## Type: Character  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           F    113     30.46          30.46     30.46          30.46
##           M    258     69.54         100.00     69.54         100.00
##        <NA>      0                               0.00         100.00
##       Total    371    100.00         100.00    100.00         100.00
freq(TB$TB.Residence)
## Frequencies  
## TB$.Residence  
## Type: Integer  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##           1     97     26.15          26.15     26.15          26.15
##           2     82     22.10          48.25     22.10          48.25
##           3    160     43.13          91.37     43.13          91.37
##           4     32      8.63         100.00      8.63         100.00
##        <NA>      0                               0.00         100.00
##       Total    371    100.00         100.00    100.00         100.00
freq(TB$TB.Referral)
## Frequencies  
## TB$.Referral  
## Type: Character  
## 
##                               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## --------------------------- ------ --------- -------------- --------- --------------
##                   COMMUNITY     50     13.48          13.48     13.48          13.48
##       OTHER PUBLIC FACILITY     15      4.04          17.52      4.04          17.52
##            PRIVATE FACILITY     36      9.70          27.22      9.70          27.22
##        PUBLIC HEALTH CENTER    270     72.78         100.00     72.78         100.00
##                        <NA>      0                               0.00         100.00
##                       Total    371    100.00         100.00    100.00         100.00
freq(TB$TB.Anatomical)
## Frequencies  
## TB$.Anatomical  
## Type: Character  
## 
##               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
##          EP      3      0.81           0.81      0.81           0.81
##           P    368     99.19         100.00     99.19         100.00
##        <NA>      0                               0.00         100.00
##       Total    371    100.00         100.00    100.00         100.00
freq(TB$TB.Bacteriologic)
## Frequencies  
## TB$.Bacteriologic  
## Type: Character  
## 
##                                        Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## ------------------------------------ ------ --------- -------------- --------- --------------
##       Bacteriologically-confirmed TB    201     54.18          54.18     54.18          54.18
##              Clinically-diagnosed TB    170     45.82         100.00     45.82         100.00
##                                 <NA>      0                               0.00         100.00
##                                Total    371    100.00         100.00    100.00         100.00
freq(TB$TB.Age.Group)
## Frequencies  
## TB$.Age.Group  
## Type: Factor  
## 
##                  Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
## -------------- ------ --------- -------------- --------- --------------
##         [0,20)     32      8.63           8.63      8.63           8.63
##        [20,40)    101     27.22          35.85     27.22          35.85
##        [40,60)    128     34.50          70.35     34.50          70.35
##        [60,80)    104     28.03          98.38     28.03          98.38
##       [80,100)      6      1.62         100.00      1.62         100.00
##           <NA>      0                               0.00         100.00
##          Total    371    100.00         100.00    100.00         100.00

Kaplan-Meier

# creating survival objects
surv.TB <- Surv(time = TB$TB.Time,event = TB$TB.Status)
#Fitting KM estimates
TB.fit <- survfit(surv.TB~1, data=TB)
summary(TB.fit)
## Call: survfit(formula = surv.TB ~ 1, data = TB)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     0    371       1  0.99730 0.00269     0.992043       1.0000
##   136    320       1  0.99419 0.00411     0.986167       1.0000
##   145    318       1  0.99106 0.00515     0.981019       1.0000
##   157    315       1  0.98792 0.00602     0.976190       0.9998
##   160    314       2  0.98162 0.00745     0.967139       0.9963
##   162    312       3  0.97218 0.00915     0.954409       0.9903
##   163    309       1  0.96904 0.00965     0.950309       0.9881
##   164    308       3  0.95960 0.01099     0.938306       0.9814
##   165    305       3  0.95016 0.01216     0.926633       0.9743
##   166    302       8  0.92499 0.01473     0.896557       0.9543
##   167    294     204  0.28316 0.02527     0.237724       0.3373
##   168     90      37  0.16675 0.02091     0.130419       0.2132
##   169     52      21  0.09941 0.01685     0.071302       0.1386
##   170     31       3  0.08979 0.01611     0.063164       0.1276
##   171     28       4  0.07696 0.01503     0.052481       0.1129
##   172     24       2  0.07055 0.01445     0.047223       0.1054
##   173     22       1  0.06734 0.01414     0.044618       0.1016
##   174     21       2  0.06093 0.01350     0.039460       0.0941
##   175     19       1  0.05772 0.01317     0.036910       0.0903
##   176     18       5  0.04169 0.01130     0.024512       0.0709
##   177     13       1  0.03848 0.01087     0.022118       0.0669
##   179     12       2  0.03207 0.00996     0.017444       0.0589
##   185     10       2  0.02565 0.00894     0.012956       0.0508
##   186      8       1  0.02245 0.00838     0.010800       0.0467
##   188      7       1  0.01924 0.00777     0.008717       0.0425
##   190      6       1  0.01603 0.00711     0.006725       0.0382
##   197      5       2  0.00962 0.00552     0.003121       0.0297
##   198      3       1  0.00641 0.00452     0.001612       0.0255
##   200      2       1  0.00321 0.00320     0.000453       0.0227
##   206      1       1  0.00000     NaN           NA           NA
# creating survival plots
survfit2(surv.TB~1, data=TB, conf.type="log-log") |>
ggsurvfit(linewidth=1, color="green")+
add_censor_mark(color="red", size=0.5)+
add_confidence_interval(type = "ribbon")+
labs(x="Days", y="Hazard Rate")+
geom_vline(xintercept = 136, color="grey40", linetype="dotted")+
geom_hline(yintercept = 0.50, color="grey40", linetype="dashed")+
geom_vline(xintercept = 167, color="grey40", linetype="dashed")+
theme(panel.grid = element_blank())+
  annotate("text",x=136, y=0.1, label= "Day 136", color="grey60")+
  annotate("text",x=167, y=0.5, label= "Median Survival", color="grey40")

# Estimating median survival Time
survfit(surv.TB~1, data=TB)
## Call: survfit(formula = surv.TB ~ 1, data = TB)
## 
##        n events median 0.95LCL 0.95UCL
## [1,] 371    317    167     167     167
median(survfit(surv.TB~1, data=TB))
##  50 
## 167
#to identify number of events per category
a <- survfit(surv.TB~TB.Sex, data=TB)
b <-survfit(surv.TB~TB.Residence, data=TB)
c <-survfit(surv.TB~TB.Referral, data=TB)
d <-survfit(surv.TB~TB.Anatomical, data=TB)
e <-survfit(surv.TB~TB.Bacteriologic, data=TB)
f <-survfit(surv.TB~TB.Age.Group, data=TB)
summary(a)$table
##          records n.max n.start events    rmean se(rmean) median 0.95LCL 0.95UCL
## TB.Sex=F     113   113     113    100 168.1400 0.4086612    167     167     167
## TB.Sex=M     258   258     258    217 167.5483 0.7772760    167     167     167
summary(b)$table
##                records n.max n.start events    rmean se(rmean) median 0.95LCL
## TB.Residence=1      97    97      97     86 167.7558 0.5967589    167     167
## TB.Residence=2      82    82      82     63 170.0145 1.0589461    167     167
## TB.Residence=3     160   160     160    141 166.5536 1.0904087    167     167
## TB.Residence=4      32    32      32     27 168.2963 1.1556681    167     167
##                0.95UCL
## TB.Residence=1     167
## TB.Residence=2     167
## TB.Residence=3     167
## TB.Residence=4     168
summary(c)$table
##                                   records n.max n.start events    rmean
## TB.Referral=COMMUNITY                  50    50      50     50 168.1800
## TB.Referral=OTHER PUBLIC FACILITY      15    15      15     15 166.8667
## TB.Referral=PRIVATE FACILITY           36    36      36     35 167.1143
## TB.Referral=PUBLIC HEALTH CENTER      270   270     270    217 167.8117
##                                   se(rmean) median 0.95LCL 0.95UCL
## TB.Referral=COMMUNITY             0.5243587    167     167     167
## TB.Referral=OTHER PUBLIC FACILITY 0.3755490    167     167     168
## TB.Referral=PRIVATE FACILITY      1.2550446    167     167     167
## TB.Referral=PUBLIC HEALTH CENTER  0.7383680    167     167     167
summary(d)$table
##                  records n.max n.start events   rmean se(rmean) median 0.95LCL
## TB.Anatomical=EP       3     3       3      3 167.000 0.0000000    167      NA
## TB.Anatomical=P      368   368     368    314 167.731 0.5576206    167     167
##                  0.95UCL
## TB.Anatomical=EP      NA
## TB.Anatomical=P      167
summary(e)$table
##                                                 records n.max n.start events
## TB.Bacteriologic=Bacteriologically-confirmed TB     201   201     201    170
## TB.Bacteriologic=Clinically-diagnosed TB            170   170     170    147
##                                                    rmean se(rmean) median
## TB.Bacteriologic=Bacteriologically-confirmed TB 168.0540 0.3682387    167
## TB.Bacteriologic=Clinically-diagnosed TB        167.3197 1.1222973    167
##                                                 0.95LCL 0.95UCL
## TB.Bacteriologic=Bacteriologically-confirmed TB     167     167
## TB.Bacteriologic=Clinically-diagnosed TB            167     167
summary(f)$table
##                       records n.max n.start events    rmean se(rmean) median
## TB.Age.Group=[0,20)        32    32      32     29 168.2414 0.4916791    167
## TB.Age.Group=[20,40)      101   101     101     89 168.3483 0.7042177    167
## TB.Age.Group=[40,60)      128   128     128    111 168.2598 0.5809606    167
## TB.Age.Group=[60,80)      104   104     104     82 166.3113 1.6916247    167
## TB.Age.Group=[80,100)       6     6       6      6 167.1667 0.1521452    167
##                       0.95LCL 0.95UCL
## TB.Age.Group=[0,20)       167     168
## TB.Age.Group=[20,40)      167     167
## TB.Age.Group=[40,60)      167     167
## TB.Age.Group=[60,80)      167     167
## TB.Age.Group=[80,100)     167      NA

Log-rank Test

##comparing survival times(curves) within groups
survdiff(surv.TB~TB.Sex, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Sex, data = TB)
## 
##            N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Sex=F 113      100     98.3    0.0287    0.0906
## TB.Sex=M 258      217    218.7    0.0129    0.0906
## 
##  Chisq= 0.1  on 1 degrees of freedom, p= 0.8
survdiff(surv.TB~TB.Residence, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Residence, data = TB)
## 
##                  N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Residence=1  97       86     82.3    0.1700     0.503
## TB.Residence=2  82       63     75.5    2.0830     5.924
## TB.Residence=3 160      141    133.0    0.4860     1.879
## TB.Residence=4  32       27     26.2    0.0224     0.052
## 
##  Chisq= 6  on 3 degrees of freedom, p= 0.1
survdiff(surv.TB~TB.Referral, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Referral, data = TB)
## 
##                                     N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Referral=COMMUNITY              50       50     48.1    0.0768     0.199
## TB.Referral=OTHER PUBLIC FACILITY  15       15     11.8    0.8866     2.245
## TB.Referral=PRIVATE FACILITY       36       35     30.8    0.5705     1.457
## TB.Referral=PUBLIC HEALTH CENTER  270      217    226.3    0.3857     3.016
## 
##  Chisq= 4.5  on 3 degrees of freedom, p= 0.2
survdiff(surv.TB~TB.Anatomical, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Anatomical, data = TB)
## 
##                    N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Anatomical=EP   3        3     2.31    0.2034     0.547
## TB.Anatomical=P  368      314   314.69    0.0015     0.547
## 
##  Chisq= 0.5  on 1 degrees of freedom, p= 0.5
survdiff(surv.TB~TB.Bacteriologic, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Bacteriologic, data = TB)
## 
##                                                   N Observed Expected (O-E)^2/E
## TB.Bacteriologic=Bacteriologically-confirmed TB 201      170      170  0.000161
## TB.Bacteriologic=Clinically-diagnosed TB        170      147      147  0.000185
##                                                 (O-E)^2/V
## TB.Bacteriologic=Bacteriologically-confirmed TB  0.000749
## TB.Bacteriologic=Clinically-diagnosed TB         0.000749
## 
##  Chisq= 0  on 1 degrees of freedom, p= 1
survdiff(surv.TB~TB.Age.Group, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Age.Group, data = TB)
## 
##                         N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Age.Group=[0,20)    32       29    30.86    0.1123     0.268
## TB.Age.Group=[20,40)  101       89    92.28    0.1165     0.351
## TB.Age.Group=[40,60)  128      111   113.51    0.0555     0.187
## TB.Age.Group=[60,80)  104       82    75.31    0.5943     1.681
## TB.Age.Group=[80,100)   6        6     5.04    0.1833     0.475
## 
##  Chisq= 2.4  on 4 degrees of freedom, p= 0.7
ggsurvplot(survfit(surv.TB~TB.Sex, data=TB))

ggsurvplot(survfit(surv.TB~TB.Residence, data=TB))

ggsurvplot(survfit(surv.TB~TB.Referral, data=TB))

ggsurvplot(survfit(surv.TB~TB.Anatomical, data=TB))

ggsurvplot(survfit(surv.TB~TB.Bacteriologic, data=TB))

#Multiple cox

full <- coxph(surv.TB~TB.Age + factor(TB.Sex)+ factor(TB.Residence)+ factor(TB.Referral)+ factor(TB.Anatomical)+factor(TB.Bacteriologic), ties = "exact", data = TB )

full %>%
  tbl_regression(exp=TRUE)
Characteristic HR1 95% CI1 p-value
TB.Age 1.01 1.00, 1.02 0.036
factor(TB.Sex)
    F
    M 0.96 0.67, 1.37 0.8
factor(TB.Residence)
    1
    2 0.59 0.37, 0.96 0.034
    3 1.00 0.66, 1.53 >0.9
    4 0.86 0.45, 1.67 0.7
factor(TB.Referral)
    COMMUNITY
    OTHER PUBLIC FACILITY 1.69 0.62, 4.61 0.3
    PRIVATE FACILITY 1.39 0.69, 2.79 0.4
    PUBLIC HEALTH CENTER 0.85 0.53, 1.37 0.5
factor(TB.Anatomical)
    EP
    P 0.40 0.05, 3.25 0.4
factor(TB.Bacteriologic)
    Bacteriologically-confirmed TB
    Clinically-diagnosed TB 0.85 0.59, 1.22 0.4
1 HR = Hazard Ratio, CI = Confidence Interval