1. Explore the data using descriptive statistics:
# import data set
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
##
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(PredictABEL)
library(dplyr)
library(tidyverse)
pbcData = read_csv("pbctrial.csv")
## Rows: 312 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): sex, agecat
## dbl (16): case, drug, bil, histo, death, survyr, _st, _d, _t, _t0, ageyr, ag...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## exploring the data
summary(pbcData)
## case drug sex bil
## Min. : 1.00 Min. :0.0000 Length:312 Min. : 0.300
## 1st Qu.: 78.75 1st Qu.:0.0000 Class :character 1st Qu.: 0.800
## Median :156.50 Median :1.0000 Mode :character Median : 1.350
## Mean :156.50 Mean :0.5064 Mean : 3.256
## 3rd Qu.:234.25 3rd Qu.:1.0000 3rd Qu.: 3.425
## Max. :312.00 Max. :1.0000 Max. :28.000
## histo death survyr _st
## Min. :1.000 Min. :0.0000 Min. : 0.1123 Min. :1
## 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.: 3.2630 1st Qu.:1
## Median :3.000 Median :0.0000 Median : 5.0397 Median :1
## Mean :3.032 Mean :0.4006 Mean : 5.4969 Mean :1
## 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.: 7.3897 3rd Qu.:1
## Max. :4.000 Max. :1.0000 Max. :12.4822 Max. :1
## _d _t _t0 ageyr
## Min. :0.0000 Min. : 0.1123 Min. :0 Min. :26.30
## 1st Qu.:0.0000 1st Qu.: 3.2630 1st Qu.:0 1st Qu.:42.27
## Median :0.0000 Median : 5.0397 Median :0 Median :49.83
## Mean :0.4006 Mean : 5.4969 Mean :0 Mean :50.05
## 3rd Qu.:1.0000 3rd Qu.: 7.3897 3rd Qu.:0 3rd Qu.:56.75
## Max. :1.0000 Max. :12.4822 Max. :0 Max. :78.49
## agecat agegr_2 agegr_3 hstage2
## Length:312 Min. :0.0000 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3237 Mean :0.3365 Mean :0.2147
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## hstage3 hstage4
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000
## Mean :0.3846 Mean :0.3494
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
## table 1 (outcome = death) - stratified by exposure
## age
pbcData %>%
tabyl(drug, agecat) %>%
adorn_totals(where="row")%>%
adorn_totals(where="col")%>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits=1) %>%
adorn_ns()%>%
adorn_title()
## agecat
## drug < 45 yrs >= 55 yrs 45 - 55 yrs Total
## 0 37.7% (58) 28.6% (44) 33.8% (52) 100.0% (154)
## 1 30.4% (48) 38.6% (61) 31.0% (49) 100.0% (158)
## Total 34.0% (106) 33.7% (105) 32.4% (101) 100.0% (312)
## sex
pbcData %>%
tabyl(drug, sex) %>%
adorn_totals(where="row")%>%
adorn_totals(where="col")%>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits=1) %>%
adorn_ns()%>%
adorn_title()
## sex
## drug Female Male Total
## 0 90.3% (139) 9.7% (15) 100.0% (154)
## 1 86.7% (137) 13.3% (21) 100.0% (158)
## Total 88.5% (276) 11.5% (36) 100.0% (312)
## histo
pbcData %>%
tabyl(drug, histo) %>%
adorn_totals(where="row")%>%
adorn_totals(where="col")%>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits=1) %>%
adorn_ns()%>%
adorn_title()
## histo
## drug 1 2 3 4 Total
## 0 2.6% (4) 20.8% (32) 41.6% (64) 35.1% (54) 100.0% (154)
## 1 7.6% (12) 22.2% (35) 35.4% (56) 34.8% (55) 100.0% (158)
## Total 5.1% (16) 21.5% (67) 38.5% (120) 34.9% (109) 100.0% (312)
## bill
summary(subset(pbcData$bil, pbcData$drug == 1))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.300 0.800 1.400 2.873 3.200 20.000
sd(subset(pbcData$bil, pbcData$drug == 1))
## [1] 3.628855
summary(subset(pbcData$bil, pbcData$drug == 0))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.300 0.725 1.300 3.649 3.600 28.000
sd(subset(pbcData$bil, pbcData$drug == 0))
## [1] 5.281949
### mission data
variables <- c("case", "drug", "sex", "bil", "histo", "survyr", "ageyr")
#missing value check
for (i in variables) {
missing_count <- sum(is.na(pbcData[[i]]))
cat("Missing values in", i, ":", missing_count, "\n")
}
## Missing values in case : 0
## Missing values in drug : 0
## Missing values in sex : 0
## Missing values in bil : 0
## Missing values in histo : 0
## Missing values in survyr : 0
## Missing values in ageyr : 0
3. Explore differences in time to death by different baseline
variables using graphs and complementary log-log plots
# estimate survival curves for entire sample
km.overall = survfit(SurvObj ~ 1, data = pbcData,
type="kaplan-meier", conf.type="log-log")
km.overall
## Call: survfit(formula = SurvObj ~ 1, data = pbcData, type = "kaplan-meier",
## conf.type = "log-log")
##
## n events median 0.95LCL 0.95UCL
## [1,] 312 125 9.3 8.45 10.5
summary(km.overall)
## Call: survfit(formula = SurvObj ~ 1, data = pbcData, type = "kaplan-meier",
## conf.type = "log-log")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0.112 312 1 0.997 0.00320 0.977 1.000
## 0.140 311 1 0.994 0.00452 0.975 0.998
## 0.195 310 1 0.990 0.00552 0.970 0.997
## 0.211 309 1 0.987 0.00637 0.966 0.995
## 0.301 308 1 0.984 0.00711 0.962 0.993
## 0.356 307 1 0.981 0.00778 0.958 0.991
## 0.359 306 1 0.978 0.00838 0.954 0.989
## 0.384 305 1 0.974 0.00895 0.949 0.987
## 0.490 304 1 0.971 0.00948 0.945 0.985
## 0.510 303 1 0.968 0.00997 0.941 0.983
## 0.523 302 1 0.965 0.01044 0.937 0.980
## 0.542 301 1 0.962 0.01089 0.933 0.978
## 0.567 300 1 0.958 0.01131 0.929 0.976
## 0.592 299 1 0.955 0.01172 0.925 0.973
## 0.611 298 1 0.952 0.01211 0.922 0.971
## 0.723 297 2 0.946 0.01285 0.914 0.966
## 0.833 295 1 0.942 0.01320 0.910 0.963
## 0.879 294 1 0.939 0.01354 0.906 0.961
## 0.893 293 1 0.936 0.01387 0.902 0.958
## 0.915 292 1 0.933 0.01418 0.899 0.956
## 0.953 291 1 0.929 0.01449 0.895 0.953
## 1.063 290 1 0.926 0.01479 0.891 0.950
## 1.096 289 1 0.923 0.01509 0.887 0.948
## 1.260 288 1 0.920 0.01537 0.884 0.945
## 1.411 287 1 0.917 0.01565 0.880 0.942
## 1.504 285 1 0.913 0.01592 0.876 0.940
## 1.512 284 1 0.910 0.01619 0.873 0.937
## 1.636 283 1 0.907 0.01644 0.869 0.934
## 1.674 282 1 0.904 0.01670 0.865 0.932
## 1.844 281 1 0.901 0.01695 0.862 0.929
## 1.901 280 1 0.897 0.01719 0.858 0.926
## 1.940 279 1 0.894 0.01742 0.854 0.924
## 2.008 277 1 0.891 0.01766 0.851 0.921
## 2.055 275 1 0.888 0.01789 0.847 0.918
## 2.088 274 1 0.884 0.01811 0.843 0.915
## 2.107 273 1 0.881 0.01833 0.840 0.912
## 2.153 272 1 0.878 0.01855 0.836 0.910
## 2.164 270 1 0.875 0.01877 0.833 0.907
## 2.184 269 1 0.871 0.01898 0.829 0.904
## 2.189 268 1 0.868 0.01918 0.825 0.901
## 2.258 267 1 0.865 0.01938 0.822 0.898
## 2.329 264 1 0.862 0.01958 0.818 0.896
## 2.337 263 1 0.858 0.01978 0.814 0.893
## 2.353 262 1 0.855 0.01998 0.811 0.890
## 2.438 260 1 0.852 0.02017 0.807 0.887
## 2.477 258 1 0.849 0.02036 0.804 0.884
## 2.548 257 1 0.845 0.02055 0.800 0.881
## 2.584 255 1 0.842 0.02073 0.796 0.878
## 2.660 254 1 0.839 0.02091 0.793 0.875
## 2.668 253 1 0.835 0.02109 0.789 0.872
## 2.685 252 1 0.832 0.02127 0.785 0.869
## 2.737 250 1 0.829 0.02144 0.782 0.866
## 2.740 249 1 0.825 0.02161 0.778 0.863
## 2.773 248 1 0.822 0.02178 0.775 0.860
## 2.841 246 1 0.819 0.02194 0.771 0.857
## 2.951 244 1 0.815 0.02211 0.767 0.854
## 2.959 243 1 0.812 0.02227 0.764 0.851
## 2.967 242 1 0.809 0.02243 0.760 0.848
## 3.156 239 1 0.805 0.02259 0.756 0.845
## 3.192 237 1 0.802 0.02275 0.753 0.842
## 3.205 236 1 0.798 0.02291 0.749 0.839
## 3.263 235 2 0.792 0.02321 0.742 0.833
## 3.321 233 1 0.788 0.02336 0.738 0.830
## 3.334 230 1 0.785 0.02350 0.734 0.827
## 3.384 227 1 0.781 0.02365 0.731 0.824
## 3.553 222 1 0.778 0.02381 0.727 0.820
## 3.699 214 1 0.774 0.02397 0.723 0.817
## 3.715 213 1 0.771 0.02413 0.719 0.814
## 3.726 212 1 0.767 0.02429 0.715 0.811
## 3.871 206 1 0.763 0.02446 0.711 0.807
## 3.910 203 1 0.759 0.02462 0.707 0.804
## 3.929 201 1 0.756 0.02479 0.703 0.800
## 3.956 198 1 0.752 0.02496 0.699 0.797
## 4.074 193 1 0.748 0.02513 0.695 0.793
## 4.088 192 1 0.744 0.02530 0.690 0.790
## 4.208 189 1 0.740 0.02547 0.686 0.786
## 4.318 184 1 0.736 0.02565 0.682 0.783
## 4.540 178 1 0.732 0.02583 0.677 0.779
## 4.608 175 1 0.728 0.02602 0.673 0.775
## 4.630 174 2 0.719 0.02639 0.664 0.767
## 4.770 169 1 0.715 0.02657 0.659 0.764
## 4.893 162 1 0.711 0.02677 0.654 0.760
## 5.005 159 1 0.706 0.02697 0.650 0.755
## 5.060 156 1 0.702 0.02718 0.645 0.751
## 5.274 151 1 0.697 0.02739 0.640 0.747
## 5.630 141 1 0.692 0.02764 0.634 0.743
## 5.701 140 1 0.687 0.02788 0.629 0.738
## 5.726 139 1 0.682 0.02812 0.624 0.734
## 5.767 138 1 0.677 0.02834 0.618 0.729
## 6.093 127 1 0.672 0.02862 0.612 0.725
## 6.181 123 1 0.667 0.02890 0.606 0.720
## 6.268 121 1 0.661 0.02918 0.600 0.715
## 6.293 119 1 0.655 0.02946 0.594 0.710
## 6.537 110 1 0.649 0.02979 0.588 0.704
## 6.575 109 1 0.644 0.03011 0.581 0.699
## 6.627 108 1 0.638 0.03041 0.575 0.694
## 6.756 103 1 0.631 0.03074 0.568 0.688
## 6.858 100 1 0.625 0.03108 0.561 0.683
## 6.959 96 1 0.619 0.03143 0.554 0.677
## 7.077 88 1 0.612 0.03185 0.546 0.671
## 7.118 87 1 0.604 0.03225 0.538 0.664
## 7.367 80 1 0.597 0.03272 0.530 0.658
## 7.586 76 1 0.589 0.03322 0.521 0.651
## 7.660 74 1 0.581 0.03371 0.512 0.644
## 7.800 71 1 0.573 0.03421 0.503 0.637
## 8.455 60 1 0.563 0.03495 0.492 0.629
## 8.466 59 1 0.554 0.03564 0.481 0.620
## 8.685 53 1 0.543 0.03646 0.469 0.612
## 8.827 52 1 0.533 0.03723 0.457 0.603
## 8.888 50 1 0.522 0.03798 0.445 0.594
## 8.992 48 1 0.511 0.03872 0.433 0.584
## 9.200 45 1 0.500 0.03949 0.420 0.574
## 9.301 43 1 0.488 0.04025 0.407 0.564
## 9.392 41 1 0.476 0.04099 0.394 0.554
## 9.438 40 1 0.465 0.04166 0.381 0.544
## 9.792 37 1 0.452 0.04238 0.368 0.533
## 9.819 34 1 0.439 0.04317 0.353 0.521
## 10.307 30 1 0.424 0.04414 0.337 0.509
## 10.518 27 1 0.408 0.04522 0.319 0.495
## 10.556 25 1 0.392 0.04626 0.302 0.481
## 11.175 17 1 0.369 0.04895 0.274 0.464
## 11.482 13 1 0.341 0.05278 0.240 0.444
# plot km curves
plot(km.overall)

# for drug group
# distribution
Freq <- table(pbcData$drug, useNA="ifany")
Percentage <- round((prop.table(Freq)*100),2)
cbind(Freq,Percentage)
## Freq Percentage
## 0 154 49.36
## 1 158 50.64
# estimate survival curves
km.drug = survfit(SurvObj ~ drug, data = pbcData,
type="kaplan-meier", conf.type="log-log")
plot(km.drug, main="Kaplan-Meier survival estimates", xlab="Time(years)", ylab="S(t)", col=c("blue", "red"))
legend("topright", legend=c("drug = 0", "drug = 1"), col=c("blue", "red"), lwd=1)
km.drug
## Call: survfit(formula = SurvObj ~ drug, data = pbcData, type = "kaplan-meier",
## conf.type = "log-log")
##
## n events median 0.95LCL 0.95UCL
## drug=0 154 60 9.39 8.47 10.6
## drug=1 158 65 8.99 6.96 11.5
summary(km.drug)
## Call: survfit(formula = SurvObj ~ drug, data = pbcData, type = "kaplan-meier",
## conf.type = "log-log")
##
## drug=0
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0.140 154 1 0.994 0.00647 0.955 0.999
## 0.211 153 1 0.987 0.00912 0.949 0.997
## 0.301 152 1 0.981 0.01114 0.941 0.994
## 0.356 151 1 0.974 0.01282 0.932 0.990
## 0.510 150 1 0.968 0.01428 0.924 0.986
## 0.523 149 1 0.961 0.01559 0.915 0.982
## 0.567 148 1 0.955 0.01679 0.907 0.978
## 0.592 147 1 0.948 0.01788 0.899 0.974
## 0.723 146 2 0.935 0.01986 0.883 0.965
## 0.833 144 1 0.929 0.02075 0.875 0.960
## 0.879 143 1 0.922 0.02160 0.867 0.955
## 0.893 142 1 0.916 0.02240 0.859 0.950
## 1.260 141 1 0.909 0.02317 0.851 0.945
## 1.504 140 1 0.903 0.02389 0.844 0.940
## 1.512 139 1 0.896 0.02459 0.836 0.935
## 1.636 138 1 0.890 0.02525 0.828 0.930
## 1.674 137 1 0.883 0.02589 0.821 0.925
## 1.940 136 1 0.877 0.02650 0.813 0.919
## 2.008 135 1 0.870 0.02709 0.806 0.914
## 2.107 134 1 0.864 0.02765 0.799 0.909
## 2.153 133 1 0.857 0.02820 0.791 0.904
## 2.164 131 1 0.851 0.02873 0.784 0.898
## 2.184 130 1 0.844 0.02925 0.776 0.893
## 2.329 128 1 0.837 0.02975 0.769 0.887
## 2.337 127 1 0.831 0.03024 0.762 0.882
## 2.353 126 1 0.824 0.03071 0.754 0.876
## 2.438 125 1 0.818 0.03116 0.747 0.870
## 2.548 124 1 0.811 0.03160 0.740 0.865
## 2.584 123 1 0.804 0.03203 0.732 0.859
## 2.668 122 1 0.798 0.03244 0.725 0.853
## 2.959 118 1 0.791 0.03286 0.718 0.847
## 3.192 115 1 0.784 0.03328 0.710 0.841
## 3.321 114 1 0.777 0.03370 0.703 0.836
## 3.334 111 1 0.770 0.03411 0.695 0.829
## 3.715 103 1 0.763 0.03459 0.687 0.823
## 3.871 101 1 0.755 0.03506 0.678 0.816
## 3.910 98 1 0.748 0.03554 0.670 0.810
## 3.956 95 1 0.740 0.03603 0.661 0.803
## 4.074 93 1 0.732 0.03651 0.652 0.796
## 4.208 91 1 0.724 0.03698 0.644 0.789
## 4.893 79 1 0.715 0.03763 0.633 0.781
## 5.060 76 1 0.705 0.03829 0.623 0.773
## 5.726 69 1 0.695 0.03908 0.611 0.764
## 6.627 56 1 0.683 0.04030 0.596 0.754
## 6.756 53 1 0.670 0.04155 0.581 0.744
## 6.858 51 1 0.657 0.04276 0.566 0.733
## 7.586 40 1 0.640 0.04473 0.545 0.720
## 7.660 38 1 0.623 0.04662 0.525 0.707
## 7.800 35 1 0.605 0.04857 0.503 0.693
## 8.466 32 1 0.587 0.05060 0.481 0.678
## 8.685 29 1 0.566 0.05275 0.457 0.662
## 8.888 28 1 0.546 0.05460 0.433 0.646
## 9.200 26 1 0.525 0.05640 0.409 0.628
## 9.301 24 1 0.503 0.05814 0.385 0.610
## 9.392 22 1 0.480 0.05983 0.360 0.591
## 9.438 21 1 0.457 0.06119 0.335 0.572
## 10.307 15 1 0.427 0.06427 0.300 0.548
## 10.518 13 1 0.394 0.06719 0.264 0.522
## 10.556 12 1 0.361 0.06916 0.230 0.494
##
## drug=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0.112 158 1 0.994 0.00631 0.956 0.999
## 0.195 157 1 0.987 0.00889 0.950 0.997
## 0.359 156 1 0.981 0.01086 0.942 0.994
## 0.384 155 1 0.975 0.01250 0.934 0.990
## 0.490 154 1 0.968 0.01393 0.926 0.987
## 0.542 153 1 0.962 0.01521 0.917 0.983
## 0.611 152 1 0.956 0.01637 0.909 0.979
## 0.915 151 1 0.949 0.01744 0.901 0.974
## 0.953 150 1 0.943 0.01844 0.893 0.970
## 1.063 149 1 0.937 0.01937 0.886 0.965
## 1.096 148 1 0.930 0.02025 0.878 0.961
## 1.411 147 1 0.924 0.02108 0.870 0.956
## 1.844 145 1 0.918 0.02187 0.862 0.951
## 1.901 144 1 0.911 0.02263 0.855 0.946
## 2.055 141 1 0.905 0.02337 0.847 0.942
## 2.088 140 1 0.898 0.02408 0.839 0.936
## 2.189 139 1 0.892 0.02476 0.832 0.931
## 2.258 138 1 0.885 0.02541 0.824 0.926
## 2.477 134 1 0.879 0.02607 0.817 0.921
## 2.660 132 1 0.872 0.02671 0.809 0.916
## 2.685 131 1 0.866 0.02732 0.801 0.910
## 2.737 130 1 0.859 0.02791 0.794 0.905
## 2.740 129 1 0.852 0.02848 0.786 0.899
## 2.773 128 1 0.846 0.02902 0.778 0.894
## 2.841 127 1 0.839 0.02955 0.771 0.888
## 2.951 126 1 0.832 0.03005 0.763 0.883
## 2.967 125 1 0.826 0.03054 0.756 0.877
## 3.156 124 1 0.819 0.03101 0.749 0.871
## 3.205 122 1 0.812 0.03148 0.741 0.866
## 3.263 121 2 0.799 0.03236 0.726 0.854
## 3.384 117 1 0.792 0.03279 0.719 0.848
## 3.553 114 1 0.785 0.03323 0.711 0.842
## 3.699 111 1 0.778 0.03368 0.703 0.836
## 3.726 110 1 0.771 0.03411 0.695 0.830
## 3.929 105 1 0.764 0.03456 0.687 0.823
## 4.088 100 1 0.756 0.03505 0.679 0.817
## 4.318 97 1 0.748 0.03554 0.670 0.810
## 4.540 93 1 0.740 0.03606 0.661 0.803
## 4.608 92 1 0.732 0.03655 0.653 0.796
## 4.630 91 2 0.716 0.03748 0.635 0.782
## 4.770 87 1 0.708 0.03794 0.626 0.775
## 5.005 82 1 0.699 0.03845 0.616 0.767
## 5.274 78 1 0.690 0.03899 0.607 0.759
## 5.630 72 1 0.681 0.03960 0.596 0.751
## 5.701 71 1 0.671 0.04019 0.585 0.743
## 5.767 70 1 0.661 0.04074 0.575 0.734
## 6.093 65 1 0.651 0.04137 0.564 0.725
## 6.181 63 1 0.641 0.04198 0.552 0.716
## 6.268 61 1 0.630 0.04259 0.541 0.707
## 6.293 60 1 0.620 0.04315 0.529 0.698
## 6.537 54 1 0.608 0.04385 0.517 0.688
## 6.575 53 1 0.597 0.04450 0.504 0.678
## 6.959 47 1 0.584 0.04533 0.490 0.667
## 7.077 42 1 0.570 0.04634 0.474 0.655
## 7.118 41 1 0.556 0.04725 0.459 0.643
## 7.367 38 1 0.542 0.04822 0.443 0.631
## 8.455 28 1 0.522 0.05023 0.420 0.615
## 8.827 24 1 0.501 0.05264 0.394 0.598
## 8.992 22 1 0.478 0.05495 0.367 0.580
## 9.792 18 1 0.451 0.05795 0.336 0.560
## 9.819 17 1 0.425 0.06032 0.306 0.539
## 11.175 8 1 0.372 0.07247 0.233 0.510
## 11.482 7 1 0.319 0.07922 0.173 0.474
#install.packages("survminer")
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
ggsurvplot(km.drug, risk.table=TRUE)


# log rank test for equality of survivor functions
survdiff(SurvObj ~ drug, data=pbcData)
## Call:
## survdiff(formula = SurvObj ~ drug, data = pbcData)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## drug=0 154 60 61.8 0.0513 0.102
## drug=1 158 65 63.2 0.0502 0.102
##
## Chisq= 0.1 on 1 degrees of freedom, p= 0.7
# complimentary log-log plot - CLL plot
plot(km.drug, fun="cloglog", main="Complimentary Log-log Plot", ylab="log(-log(Survival Probability)",
xlab="Analysis time (shown on log scale)", col=c("blue", "red"))
legend("topleft", legend=c("drug = 0", "drug = 1"), col=c("blue", "red"), lwd=1)

# for age group - categories
# distribution
Freq <- table(pbcData$agecat, useNA="ifany")
Percentage <- round((prop.table(Freq)*100),2)
cbind(Freq,Percentage)
## Freq Percentage
## < 45 yrs 106 33.97
## >= 55 yrs 105 33.65
## 45 - 55 yrs 101 32.37
# estimate survival curves
km.agecat = survfit(SurvObj ~ agecat, data = pbcData,
type="kaplan-meier", conf.type="log-log")
plot(km.agecat, main="Kaplan-Meier survival estimates", xlab="Time(years)", ylab="S(t)", col=c("blue", "red", "green"))
legend("topright", legend=c("< 45 yrs", "45 - 55 yrs", ">= 55 yrs"), col=c("blue", "red", "green"), lwd=1)

# complimentary log-log plot - CLL plot
plot(km.agecat, fun="cloglog", main="Complimentary Log-log Plot", ylab="log(-log(Survival Probability)",
xlab="Analysis time (shown on log scale)", col=c("blue", "red", "green"))
legend("topleft", legend=c("< 45 yrs", "45 - 55 yrs", ">= 55 yrs"), col=c("blue", "red", "green"), lwd=1)

# log rank test for equality of survivor functions
survdiff(SurvObj ~ agecat, data=pbcData)
## Call:
## survdiff(formula = SurvObj ~ agecat, data = pbcData)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## agecat=< 45 yrs 106 27 45.0 7.171 11.22
## agecat=>= 55 yrs 105 50 37.4 4.259 6.11
## agecat=45 - 55 yrs 101 48 42.7 0.668 1.02
##
## Chisq= 12.1 on 2 degrees of freedom, p= 0.002
## sex
# distribution
Freq <- table(pbcData$sex, useNA="ifany")
Percentage <- round((prop.table(Freq)*100),2)
cbind(Freq,Percentage)
## Freq Percentage
## Female 276 88.46
## Male 36 11.54
# estimate survival curves
km.sex = survfit(SurvObj ~ sex, data = pbcData,
type="kaplan-meier", conf.type="log-log")
plot(km.sex, main="Kaplan-Meier survival estimates", xlab="Time(years)", ylab="S(t)", col=c("blue", "red"))
legend("topright", legend=c("Female", "Male"), col=c("blue", "red"), lwd=1)

# complimentary log-log plot - CLL plot
plot(km.sex, fun="cloglog", main="Complimentary Log-log Plot", ylab="log(-log(Survival Probability)",
xlab="Analysis time (shown on log scale)", col=c("blue", "red"))
legend("topleft", legend=c("Female", "Male"), col=c("blue", "red"), lwd=1)

# log rank test for equality of survivor functions
survdiff(SurvObj ~ sex, data=pbcData)
## Call:
## survdiff(formula = SurvObj ~ sex, data = pbcData)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=Female 276 103 110.4 0.494 4.27
## sex=Male 36 22 14.6 3.728 4.27
##
## Chisq= 4.3 on 1 degrees of freedom, p= 0.04
## histo
# distribution
Freq <- table(pbcData$histo, useNA="ifany")
Percentage <- round((prop.table(Freq)*100),2)
cbind(Freq,Percentage)
## Freq Percentage
## 1 16 5.13
## 2 67 21.47
## 3 120 38.46
## 4 109 34.94
# estimate survival curves
km.histo = survfit(SurvObj ~ histo, data = pbcData,
type="kaplan-meier", conf.type="log-log")
plot(km.histo, main="Kaplan-Meier survival estimates", xlab="Time(years)", ylab="S(t)", col=c("blue", "red", "green", "pink"))
legend("topright", legend=c("Stage 1", "Stage 2", "Stage 3", "Stage 4"), col=c("blue", "red", "green", "pink"), lwd=1)

# complimentary log-log plot - CLL plot
plot(km.histo, fun="cloglog", main="Complimentary Log-log Plot", ylab="log(-log(Survival Probability)",
xlab="Analysis time (shown on log scale)", col=c("blue", "red", "green", "pink"))
legend("topleft", legend=c("Stage 1", "Stage 2", "Stage 3", "Stage 4"), col=c("blue", "red", "green", "pink"), lwd=1)

# log rank test for equality of survivor functions
survdiff(SurvObj ~ histo, data=pbcData)
## Call:
## survdiff(formula = SurvObj ~ histo, data = pbcData)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## histo=1 16 1 9.9 8.00 8.78
## histo=2 67 16 32.3 8.22 11.17
## histo=3 120 43 51.2 1.30 2.22
## histo=4 109 65 31.6 35.17 47.84
##
## Chisq= 53.8 on 3 degrees of freedom, p= 1e-11
## bil
summary(pbcData$bil)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.300 0.800 1.350 3.256 3.425 28.000
sd(pbcData$bil)
## [1] 4.530315
# log rank test for equality of survivor functions
survdiff(SurvObj ~ bil, data=pbcData)
## Call:
## survdiff(formula = SurvObj ~ bil, data = pbcData)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## bil=0.30000001 3 2 1.64469 7.68e-02 7.79e-02
## bil=0.40000001 8 1 3.62971 1.91e+00 1.96e+00
## bil=0.5 20 0 8.54246 8.54e+00 9.22e+00
## bil=0.60000002 22 1 11.38216 9.47e+00 1.05e+01
## bil=0.69999999 20 2 12.89572 9.21e+00 1.05e+01
## bil=0.80000001 17 8 8.05251 3.42e-04 3.68e-04
## bil=0.89999998 14 2 7.24020 3.79e+00 4.04e+00
## bil=1 12 1 5.11018 3.31e+00 3.45e+00
## bil=1.1 17 5 7.90057 1.06e+00 1.14e+00
## bil=1.2 9 2 4.74131 1.58e+00 1.65e+00
## bil=1.3 14 4 6.67194 1.07e+00 1.13e+00
## bil=1.4 7 4 3.35475 1.24e-01 1.28e-01
## bil=1.5 3 1 1.34333 8.77e-02 8.89e-02
## bil=1.6 5 1 1.80598 3.60e-01 3.66e-01
## bil=1.7 1 0 0.16783 1.68e-01 1.68e-01
## bil=1.8 5 2 3.02130 3.45e-01 3.56e-01
## bil=1.9 4 1 2.34961 7.75e-01 7.93e-01
## bil=2 6 4 1.91347 2.28e+00 2.32e+00
## bil=2.0999999 7 4 4.24525 1.42e-02 1.48e-02
## bil=2.2 2 0 0.55111 5.51e-01 5.55e-01
## bil=2.3 6 4 2.17507 1.53e+00 1.56e+00
## bil=2.4000001 3 1 0.42842 7.63e-01 7.68e-01
## bil=2.5 3 3 0.69072 7.72e+00 7.78e+00
## bil=2.5999999 1 1 0.19163 3.41e+00 3.42e+00
## bil=2.7 1 1 0.12624 6.05e+00 6.07e+00
## bil=2.8 2 1 0.46000 6.34e-01 6.38e-01
## bil=2.9000001 2 1 0.36743 1.09e+00 1.10e+00
## bil=3 3 1 1.29212 6.60e-02 6.69e-02
## bil=3.0999999 2 0 0.42107 4.21e-01 4.24e-01
## bil=3.2 7 5 2.07074 4.14e+00 4.22e+00
## bil=3.3 3 2 1.74440 3.75e-02 3.81e-02
## bil=3.4000001 5 2 1.37899 2.80e-01 2.84e-01
## bil=3.5 3 1 0.98505 2.27e-04 2.29e-04
## bil=3.5999999 3 2 0.38689 6.73e+00 6.77e+00
## bil=3.7 1 0 0.42136 4.21e-01 4.24e-01
## bil=3.8 1 1 0.24627 2.31e+00 2.32e+00
## bil=3.9000001 2 2 0.49255 4.61e+00 4.64e+00
## bil=4 2 2 0.34489 7.94e+00 7.99e+00
## bil=4.4000001 1 0 0.14481 1.45e-01 1.45e-01
## bil=4.5 3 3 0.95998 4.34e+00 4.39e+00
## bil=4.6999998 3 1 1.78973 3.48e-01 3.54e-01
## bil=5 2 2 0.39477 6.53e+00 6.56e+00
## bil=5.0999999 2 2 0.43583 5.61e+00 5.65e+00
## bil=5.1999998 2 2 0.32715 8.55e+00 8.60e+00
## bil=5.5 1 0 0.42136 4.21e-01 4.24e-01
## bil=5.5999999 2 0 0.57549 5.75e-01 5.80e-01
## bil=5.6999998 2 1 0.91907 7.13e-03 7.20e-03
## bil=5.9000001 1 1 0.20794 3.02e+00 3.03e+00
## bil=6 1 1 0.43045 7.54e-01 7.57e-01
## bil=6.0999999 1 0 0.11168 1.12e-01 1.12e-01
## bil=6.3000002 1 1 0.32860 1.37e+00 1.38e+00
## bil=6.4000001 3 1 0.60500 2.58e-01 2.60e-01
## bil=6.5 1 1 0.21207 2.93e+00 2.94e+00
## bil=6.5999999 3 2 0.34612 7.90e+00 7.95e+00
## bil=6.6999998 1 1 0.18361 3.63e+00 3.64e+00
## bil=6.8000002 1 1 0.25544 2.17e+00 2.18e+00
## bil=7.0999999 2 1 0.38371 9.90e-01 9.95e-01
## bil=7.1999998 2 2 0.43470 5.64e+00 5.67e+00
## bil=7.3000002 1 1 0.25077 2.24e+00 2.25e+00
## bil=8 1 1 0.16783 4.13e+00 4.14e+00
## bil=8.3999996 1 1 0.34698 1.23e+00 1.23e+00
## bil=8.5 2 1 0.30670 1.57e+00 1.58e+00
## bil=8.6000004 1 0 0.25077 2.51e-01 2.52e-01
## bil=8.6999998 1 0 0.19972 2.00e-01 2.00e-01
## bil=10.8 1 1 0.13734 5.42e+00 5.44e+00
## bil=11 1 1 0.17964 3.75e+00 3.76e+00
## bil=11.4 2 2 0.15627 2.18e+01 2.19e+01
## bil=12.2 1 1 0.00965 1.02e+02 1.02e+02
## bil=12.6 1 1 0.00642 1.54e+02 1.54e+02
## bil=13 1 0 0.35339 3.53e-01 3.55e-01
## bil=14 1 1 0.11529 6.79e+00 6.81e+00
## bil=14.1 1 1 0.06955 1.24e+01 1.25e+01
## bil=14.4 1 1 0.47901 5.67e-01 5.70e-01
## bil=14.5 1 1 0.07990 1.06e+01 1.06e+01
## bil=16.200001 1 1 0.19972 3.21e+00 3.22e+00
## bil=17.1 1 1 0.23322 2.52e+00 2.53e+00
## bil=17.200001 1 1 0.09036 9.16e+00 9.19e+00
## bil=17.4 3 3 0.34502 2.04e+01 2.05e+01
## bil=17.9 1 1 0.00321 3.10e+02 3.11e+02
## bil=20 1 1 0.22471 2.67e+00 2.68e+00
## bil=21.6 1 1 0.01288 7.56e+01 7.59e+01
## bil=22.5 1 1 0.15622 4.56e+00 4.57e+00
## bil=24.5 1 1 0.04583 1.99e+01 1.99e+01
## bil=25.5 1 1 0.15240 4.71e+00 4.73e+00
## bil=28 1 1 0.17175 3.99e+00 4.01e+00
##
## Chisq= 963 on 84 degrees of freedom, p= <2e-16