#importing data
TB <- read.csv("SP Data.csv")
#transform duration
trtstart <- as.Date(TB$Date.Started.Tx, "%m/%d/%Y")
outcomedate <- as.Date(TB$Date.of.Outcome.Status, "%m/%d/%Y")
Time <- difftime(outcomedate,trtstart, units = "days")
TB$Time <- as.numeric(Time) #time difference in days
#event for "cured"
cured <- if_else(TB$Outcome.Status=="CURED",1,0)
#event for "treatment completed"
trtcomp <- if_else(TB$Outcome.Status=="TREATMENT COMPLETED",1,0)
#column for status wherein an event is when outcome is cured or treatment completed
TB$Status <- if_else(cured==1|trtcomp==1,1,0)
#splitting age into groups
TB$Age.Group <- cut(TB$Age, breaks = c(seq(0,100, by=20)), right = FALSE)
#consider only the NEW cases
TB <- TB[TB$Registration=="NEW",]
#final data frame
TB <- data.frame(TB$Age, TB$Sex, TB$Residence, TB$Referral, TB$Anatomical, TB$Bacteriologic, TB$Time, TB$Status, TB$Age.Group)
#frequencies
library(summarytools)
##
## Attaching package: 'summarytools'
## The following object is masked from 'package:tibble':
##
## view
freq(TB$TB.Age)
## Frequencies
## TB$.Age
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0.75 1 0.27 0.27 0.27 0.27
## 2 1 0.27 0.54 0.27 0.54
## 3 3 0.81 1.35 0.81 1.35
## 4 1 0.27 1.62 0.27 1.62
## 5 2 0.54 2.16 0.54 2.16
## 7 1 0.27 2.43 0.27 2.43
## 8 1 0.27 2.70 0.27 2.70
## 9 1 0.27 2.96 0.27 2.96
## 10 1 0.27 3.23 0.27 3.23
## 13 2 0.54 3.77 0.54 3.77
## 14 1 0.27 4.04 0.27 4.04
## 16 3 0.81 4.85 0.81 4.85
## 17 3 0.81 5.66 0.81 5.66
## 18 5 1.35 7.01 1.35 7.01
## 19 6 1.62 8.63 1.62 8.63
## 20 1 0.27 8.89 0.27 8.89
## 21 1 0.27 9.16 0.27 9.16
## 22 3 0.81 9.97 0.81 9.97
## 23 9 2.43 12.40 2.43 12.40
## 24 3 0.81 13.21 0.81 13.21
## 25 9 2.43 15.63 2.43 15.63
## 26 7 1.89 17.52 1.89 17.52
## 27 8 2.16 19.68 2.16 19.68
## 28 4 1.08 20.75 1.08 20.75
## 29 7 1.89 22.64 1.89 22.64
## 30 8 2.16 24.80 2.16 24.80
## 32 5 1.35 26.15 1.35 26.15
## 33 7 1.89 28.03 1.89 28.03
## 34 2 0.54 28.57 0.54 28.57
## 35 8 2.16 30.73 2.16 30.73
## 36 3 0.81 31.54 0.81 31.54
## 37 7 1.89 33.42 1.89 33.42
## 38 3 0.81 34.23 0.81 34.23
## 39 6 1.62 35.85 1.62 35.85
## 40 7 1.89 37.74 1.89 37.74
## 41 3 0.81 38.54 0.81 38.54
## 42 5 1.35 39.89 1.35 39.89
## 43 5 1.35 41.24 1.35 41.24
## 44 4 1.08 42.32 1.08 42.32
## 45 9 2.43 44.74 2.43 44.74
## 46 7 1.89 46.63 1.89 46.63
## 47 3 0.81 47.44 0.81 47.44
## 48 6 1.62 49.06 1.62 49.06
## 49 5 1.35 50.40 1.35 50.40
## 50 11 2.96 53.37 2.96 53.37
## 51 8 2.16 55.53 2.16 55.53
## 52 7 1.89 57.41 1.89 57.41
## 53 8 2.16 59.57 2.16 59.57
## 54 6 1.62 61.19 1.62 61.19
## 55 7 1.89 63.07 1.89 63.07
## 56 6 1.62 64.69 1.62 64.69
## 57 8 2.16 66.85 2.16 66.85
## 58 7 1.89 68.73 1.89 68.73
## 59 6 1.62 70.35 1.62 70.35
## 60 8 2.16 72.51 2.16 72.51
## 61 7 1.89 74.39 1.89 74.39
## 62 10 2.70 77.09 2.70 77.09
## 63 8 2.16 79.25 2.16 79.25
## 64 3 0.81 80.05 0.81 80.05
## 65 11 2.96 83.02 2.96 83.02
## 66 5 1.35 84.37 1.35 84.37
## 67 9 2.43 86.79 2.43 86.79
## 68 4 1.08 87.87 1.08 87.87
## 69 6 1.62 89.49 1.62 89.49
## 70 5 1.35 90.84 1.35 90.84
## 71 6 1.62 92.45 1.62 92.45
## 72 1 0.27 92.72 0.27 92.72
## 73 3 0.81 93.53 0.81 93.53
## 74 2 0.54 94.07 0.54 94.07
## 75 2 0.54 94.61 0.54 94.61
## 76 4 1.08 95.69 1.08 95.69
## 77 5 1.35 97.04 1.35 97.04
## 78 4 1.08 98.11 1.08 98.11
## 79 1 0.27 98.38 0.27 98.38
## 80 3 0.81 99.19 0.81 99.19
## 83 1 0.27 99.46 0.27 99.46
## 88 1 0.27 99.73 0.27 99.73
## 89 1 0.27 100.00 0.27 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
freq(TB$TB.Sex)
## Frequencies
## TB$.Sex
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## F 113 30.46 30.46 30.46 30.46
## M 258 69.54 100.00 69.54 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
freq(TB$TB.Residence)
## Frequencies
## TB$.Residence
## Type: Integer
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 1 97 26.15 26.15 26.15 26.15
## 2 82 22.10 48.25 22.10 48.25
## 3 160 43.13 91.37 43.13 91.37
## 4 32 8.63 100.00 8.63 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
freq(TB$TB.Referral)
## Frequencies
## TB$.Referral
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## --------------------------- ------ --------- -------------- --------- --------------
## COMMUNITY 50 13.48 13.48 13.48 13.48
## OTHER PUBLIC FACILITY 15 4.04 17.52 4.04 17.52
## PRIVATE FACILITY 36 9.70 27.22 9.70 27.22
## PUBLIC HEALTH CENTER 270 72.78 100.00 72.78 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
freq(TB$TB.Anatomical)
## Frequencies
## TB$.Anatomical
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## EP 3 0.81 0.81 0.81 0.81
## P 368 99.19 100.00 99.19 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
freq(TB$TB.Bacteriologic)
## Frequencies
## TB$.Bacteriologic
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ------------------------------------ ------ --------- -------------- --------- --------------
## Bacteriologically-confirmed TB 201 54.18 54.18 54.18 54.18
## Clinically-diagnosed TB 170 45.82 100.00 45.82 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
freq(TB$TB.Age.Group)
## Frequencies
## TB$.Age.Group
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## -------------- ------ --------- -------------- --------- --------------
## [0,20) 32 8.63 8.63 8.63 8.63
## [20,40) 101 27.22 35.85 27.22 35.85
## [40,60) 128 34.50 70.35 34.50 70.35
## [60,80) 104 28.03 98.38 28.03 98.38
## [80,100) 6 1.62 100.00 1.62 100.00
## <NA> 0 0.00 100.00
## Total 371 100.00 100.00 100.00 100.00
Kaplan-Meier
# creating survival objects
surv.TB <- Surv(time = TB$TB.Time,event = TB$TB.Status)
#Fitting KM estimates
TB.fit <- survfit(surv.TB~1, data=TB)
summary(TB.fit)
## Call: survfit(formula = surv.TB ~ 1, data = TB)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 371 1 0.99730 0.00269 0.992043 1.0000
## 136 320 1 0.99419 0.00411 0.986167 1.0000
## 145 318 1 0.99106 0.00515 0.981019 1.0000
## 157 315 1 0.98792 0.00602 0.976190 0.9998
## 160 314 2 0.98162 0.00745 0.967139 0.9963
## 162 312 3 0.97218 0.00915 0.954409 0.9903
## 163 309 1 0.96904 0.00965 0.950309 0.9881
## 164 308 3 0.95960 0.01099 0.938306 0.9814
## 165 305 3 0.95016 0.01216 0.926633 0.9743
## 166 302 8 0.92499 0.01473 0.896557 0.9543
## 167 294 204 0.28316 0.02527 0.237724 0.3373
## 168 90 37 0.16675 0.02091 0.130419 0.2132
## 169 52 21 0.09941 0.01685 0.071302 0.1386
## 170 31 3 0.08979 0.01611 0.063164 0.1276
## 171 28 4 0.07696 0.01503 0.052481 0.1129
## 172 24 2 0.07055 0.01445 0.047223 0.1054
## 173 22 1 0.06734 0.01414 0.044618 0.1016
## 174 21 2 0.06093 0.01350 0.039460 0.0941
## 175 19 1 0.05772 0.01317 0.036910 0.0903
## 176 18 5 0.04169 0.01130 0.024512 0.0709
## 177 13 1 0.03848 0.01087 0.022118 0.0669
## 179 12 2 0.03207 0.00996 0.017444 0.0589
## 185 10 2 0.02565 0.00894 0.012956 0.0508
## 186 8 1 0.02245 0.00838 0.010800 0.0467
## 188 7 1 0.01924 0.00777 0.008717 0.0425
## 190 6 1 0.01603 0.00711 0.006725 0.0382
## 197 5 2 0.00962 0.00552 0.003121 0.0297
## 198 3 1 0.00641 0.00452 0.001612 0.0255
## 200 2 1 0.00321 0.00320 0.000453 0.0227
## 206 1 1 0.00000 NaN NA NA
# creating survival plots
survfit2(surv.TB~1, data=TB, conf.type="log-log") |>
ggsurvfit(linewidth=1, color="green")+
add_censor_mark(color="red", size=0.5)+
add_confidence_interval(type = "ribbon")+
labs(x="Days", y="Hazard Rate")+
geom_vline(xintercept = 136, color="grey40", linetype="dotted")+
geom_hline(yintercept = 0.50, color="grey40", linetype="dashed")+
geom_vline(xintercept = 167, color="grey40", linetype="dashed")+
theme(panel.grid = element_blank())+
annotate("text",x=136, y=0.1, label= "Day 136", color="grey60")+
annotate("text",x=167, y=0.5, label= "Median Survival", color="grey40")

# Estimating median survival Time
survfit(surv.TB~1, data=TB)
## Call: survfit(formula = surv.TB ~ 1, data = TB)
##
## n events median 0.95LCL 0.95UCL
## [1,] 371 317 167 167 167
median(survfit(surv.TB~1, data=TB))
## 50
## 167
#to identify number of events per category
a <- survfit(surv.TB~TB.Sex, data=TB)
b <-survfit(surv.TB~TB.Residence, data=TB)
c <-survfit(surv.TB~TB.Referral, data=TB)
d <-survfit(surv.TB~TB.Anatomical, data=TB)
e <-survfit(surv.TB~TB.Bacteriologic, data=TB)
f <-survfit(surv.TB~TB.Age.Group, data=TB)
summary(a)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL 0.95UCL
## TB.Sex=F 113 113 113 100 168.1400 0.4086612 167 167 167
## TB.Sex=M 258 258 258 217 167.5483 0.7772760 167 167 167
summary(b)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## TB.Residence=1 97 97 97 86 167.7558 0.5967589 167 167
## TB.Residence=2 82 82 82 63 170.0145 1.0589461 167 167
## TB.Residence=3 160 160 160 141 166.5536 1.0904087 167 167
## TB.Residence=4 32 32 32 27 168.2963 1.1556681 167 167
## 0.95UCL
## TB.Residence=1 167
## TB.Residence=2 167
## TB.Residence=3 167
## TB.Residence=4 168
summary(c)$table
## records n.max n.start events rmean
## TB.Referral=COMMUNITY 50 50 50 50 168.1800
## TB.Referral=OTHER PUBLIC FACILITY 15 15 15 15 166.8667
## TB.Referral=PRIVATE FACILITY 36 36 36 35 167.1143
## TB.Referral=PUBLIC HEALTH CENTER 270 270 270 217 167.8117
## se(rmean) median 0.95LCL 0.95UCL
## TB.Referral=COMMUNITY 0.5243587 167 167 167
## TB.Referral=OTHER PUBLIC FACILITY 0.3755490 167 167 168
## TB.Referral=PRIVATE FACILITY 1.2550446 167 167 167
## TB.Referral=PUBLIC HEALTH CENTER 0.7383680 167 167 167
summary(d)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## TB.Anatomical=EP 3 3 3 3 167.000 0.0000000 167 NA
## TB.Anatomical=P 368 368 368 314 167.731 0.5576206 167 167
## 0.95UCL
## TB.Anatomical=EP NA
## TB.Anatomical=P 167
summary(e)$table
## records n.max n.start events
## TB.Bacteriologic=Bacteriologically-confirmed TB 201 201 201 170
## TB.Bacteriologic=Clinically-diagnosed TB 170 170 170 147
## rmean se(rmean) median
## TB.Bacteriologic=Bacteriologically-confirmed TB 168.0540 0.3682387 167
## TB.Bacteriologic=Clinically-diagnosed TB 167.3197 1.1222973 167
## 0.95LCL 0.95UCL
## TB.Bacteriologic=Bacteriologically-confirmed TB 167 167
## TB.Bacteriologic=Clinically-diagnosed TB 167 167
summary(f)$table
## records n.max n.start events rmean se(rmean) median
## TB.Age.Group=[0,20) 32 32 32 29 168.2414 0.4916791 167
## TB.Age.Group=[20,40) 101 101 101 89 168.3483 0.7042177 167
## TB.Age.Group=[40,60) 128 128 128 111 168.2598 0.5809606 167
## TB.Age.Group=[60,80) 104 104 104 82 166.3113 1.6916247 167
## TB.Age.Group=[80,100) 6 6 6 6 167.1667 0.1521452 167
## 0.95LCL 0.95UCL
## TB.Age.Group=[0,20) 167 168
## TB.Age.Group=[20,40) 167 167
## TB.Age.Group=[40,60) 167 167
## TB.Age.Group=[60,80) 167 167
## TB.Age.Group=[80,100) 167 NA
Log-rank Test
##comparing survival times(curves) within groups
survdiff(surv.TB~TB.Sex, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Sex, data = TB)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Sex=F 113 100 98.3 0.0287 0.0906
## TB.Sex=M 258 217 218.7 0.0129 0.0906
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.8
survdiff(surv.TB~TB.Residence, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Residence, data = TB)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Residence=1 97 86 82.3 0.1700 0.503
## TB.Residence=2 82 63 75.5 2.0830 5.924
## TB.Residence=3 160 141 133.0 0.4860 1.879
## TB.Residence=4 32 27 26.2 0.0224 0.052
##
## Chisq= 6 on 3 degrees of freedom, p= 0.1
survdiff(surv.TB~TB.Referral, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Referral, data = TB)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Referral=COMMUNITY 50 50 48.1 0.0768 0.199
## TB.Referral=OTHER PUBLIC FACILITY 15 15 11.8 0.8866 2.245
## TB.Referral=PRIVATE FACILITY 36 35 30.8 0.5705 1.457
## TB.Referral=PUBLIC HEALTH CENTER 270 217 226.3 0.3857 3.016
##
## Chisq= 4.5 on 3 degrees of freedom, p= 0.2
survdiff(surv.TB~TB.Anatomical, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Anatomical, data = TB)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Anatomical=EP 3 3 2.31 0.2034 0.547
## TB.Anatomical=P 368 314 314.69 0.0015 0.547
##
## Chisq= 0.5 on 1 degrees of freedom, p= 0.5
survdiff(surv.TB~TB.Bacteriologic, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Bacteriologic, data = TB)
##
## N Observed Expected (O-E)^2/E
## TB.Bacteriologic=Bacteriologically-confirmed TB 201 170 170 0.000161
## TB.Bacteriologic=Clinically-diagnosed TB 170 147 147 0.000185
## (O-E)^2/V
## TB.Bacteriologic=Bacteriologically-confirmed TB 0.000749
## TB.Bacteriologic=Clinically-diagnosed TB 0.000749
##
## Chisq= 0 on 1 degrees of freedom, p= 1
survdiff(surv.TB~TB.Age.Group, data=TB)
## Call:
## survdiff(formula = surv.TB ~ TB.Age.Group, data = TB)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## TB.Age.Group=[0,20) 32 29 30.86 0.1123 0.268
## TB.Age.Group=[20,40) 101 89 92.28 0.1165 0.351
## TB.Age.Group=[40,60) 128 111 113.51 0.0555 0.187
## TB.Age.Group=[60,80) 104 82 75.31 0.5943 1.681
## TB.Age.Group=[80,100) 6 6 5.04 0.1833 0.475
##
## Chisq= 2.4 on 4 degrees of freedom, p= 0.7
ggsurvplot(survfit(surv.TB~TB.Sex, data=TB))

ggsurvplot(survfit(surv.TB~TB.Residence, data=TB))

ggsurvplot(survfit(surv.TB~TB.Referral, data=TB))

ggsurvplot(survfit(surv.TB~TB.Anatomical, data=TB))

ggsurvplot(survfit(surv.TB~TB.Bacteriologic, data=TB))

#Multiple cox
full <- coxph(surv.TB~TB.Age + factor(TB.Sex)+ factor(TB.Residence)+ factor(TB.Referral)+ factor(TB.Anatomical)+factor(TB.Bacteriologic), ties = "exact", data = TB )
full %>%
tbl_regression(exp=TRUE)
| Characteristic |
HR |
95% CI |
p-value |
| TB.Age |
1.01 |
1.00, 1.02 |
0.036 |
| factor(TB.Sex) |
|
|
|
| F |
— |
— |
|
| M |
0.96 |
0.67, 1.37 |
0.8 |
| factor(TB.Residence) |
|
|
|
| 1 |
— |
— |
|
| 2 |
0.59 |
0.37, 0.96 |
0.034 |
| 3 |
1.00 |
0.66, 1.53 |
>0.9 |
| 4 |
0.86 |
0.45, 1.67 |
0.7 |
| factor(TB.Referral) |
|
|
|
| COMMUNITY |
— |
— |
|
| OTHER PUBLIC FACILITY |
1.69 |
0.62, 4.61 |
0.3 |
| PRIVATE FACILITY |
1.39 |
0.69, 2.79 |
0.4 |
| PUBLIC HEALTH CENTER |
0.85 |
0.53, 1.37 |
0.5 |
| factor(TB.Anatomical) |
|
|
|
| EP |
— |
— |
|
| P |
0.40 |
0.05, 3.25 |
0.4 |
| factor(TB.Bacteriologic) |
|
|
|
| Bacteriologically-confirmed TB |
— |
— |
|
| Clinically-diagnosed TB |
0.85 |
0.59, 1.22 |
0.4 |