Task
By using SURVIVAL package and other packages in R for plots.
Choose survival data of your choice in R Program
Write R codes for the following models, results and
interpretation.
Kaplan Meier
Nelson Aalen
Log rank test
Cox proportion Hazard and its parameters results and test of
hypothesis interpretations.
Load libraries
library(here) # to easy read data from folder path
library(tidyverse) # filter, read.csv, manipulate data
library(survival) # survival library
library(survminer) #
library(summarytools)
library(kableExtra)
library(patchwork)
library(gtsummary)
library(broom)
Load Data
heart_failure<-read.csv(here("data/heart_failure_clinical_records_dataset.csv"))
df<-heart_failure %>%
select(
age,
diabetes,
smoking,
anaemia,
high_blood_pressure,
sex,
time,
DEATH_EVENT
) %>%
as.data.frame() %>%
mutate(across(
.cols = c(diabetes, smoking, anaemia,high_blood_pressure),
.fns = ~ifelse(. == 1, "Yes", "No")
)) %>%
mutate(
sex = ifelse(sex == 1, "Male", "Female"),
) %>%
as.data.frame()
The dataset contains health information for heart failure patients,
focusing on key health indicators (diabetes, smoking, anemia, high blood
pressure) along with basic demographics (sex, age). It tracks survival
time in days and records whether death occurred during the study period,
allowing researchers to analyze how various patient characteristics
might influence heart failure outcomes.
characteristics of the dataset
data_structure_summary <- function(df) {
result <- data.frame(
Seq = 1:length(names(df)),
Variable = names(df),
Class = sapply(df, class),
First_Values = sapply(df, function(x) paste0(head(x, 3), collapse = ", ")),
Missing = sapply(df, function(x) sum(is.na(x))),
Unique_Values = sapply(df, function(x) length(unique(x))),
row.names = NULL # This ensures no row names are set
)
return(result)
}
data_structure_summary(df) %>% kable(
caption = "Data Structure Summary",
align = c("r","l", "l", "l", "r", "r"),
row.names = FALSE,
booktabs = T,
escape = FALSE,
format = ifelse(knitr::is_html_output(), 'html', 'latex')
) %>% kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
latex_options = c("scale_down", "hold_position"),
full_width = F # Changed to False for better column sizing
) %>%
column_spec(1, bold = TRUE, width = "3em") %>% # Format sequence number
column_spec(2, bold = TRUE) # Format variable name
Data Structure Summary
|
Seq
|
Variable
|
Class
|
First_Values
|
Missing
|
Unique_Values
|
|
1
|
age
|
numeric
|
75, 55, 65
|
0
|
47
|
|
2
|
diabetes
|
character
|
No, No, No
|
0
|
2
|
|
3
|
smoking
|
character
|
No, No, Yes
|
0
|
2
|
|
4
|
anaemia
|
character
|
No, No, No
|
0
|
2
|
|
5
|
high_blood_pressure
|
character
|
Yes, No, No
|
0
|
2
|
|
6
|
sex
|
character
|
Male, Male, Male
|
0
|
2
|
|
7
|
time
|
integer
|
4, 6, 7
|
0
|
148
|
|
8
|
DEATH_EVENT
|
integer
|
1, 1, 1
|
0
|
2
|
sample of dataset
fmt<-function(k) {
res<-k %>% t() %>% kable(
longtable = TRUE,
booktabs = T,
escape = FALSE,
format = ifelse(knitr::is_html_output(), 'html', 'latex')
) %>% kable_styling(
bootstrap_options = c("striped","condensed","hover"),
latex_options = c("repeat_header", "hold_position"),
full_width = F
) %>%
column_spec(1, bold = TRUE)
return (res)
}
df %>% sample_n(7) %>% fmt()
|
age
|
66
|
42
|
49
|
50
|
65
|
58
|
55
|
|
diabetes
|
Yes
|
No
|
No
|
No
|
No
|
Yes
|
Yes
|
|
smoking
|
No
|
Yes
|
No
|
No
|
No
|
No
|
No
|
|
anaemia
|
Yes
|
Yes
|
Yes
|
No
|
Yes
|
No
|
No
|
|
high_blood_pressure
|
Yes
|
No
|
No
|
Yes
|
Yes
|
Yes
|
No
|
|
sex
|
Female
|
Male
|
Female
|
Male
|
Male
|
Female
|
Female
|
|
time
|
95
|
201
|
147
|
108
|
194
|
83
|
246
|
|
DEATH_EVENT
|
0
|
0
|
0
|
0
|
0
|
0
|
0
|
Data Description and Properties
freq(df$diabetes) %>% kable() %>% kable_classic() ## diabetes
|
|
Freq
|
% Valid
|
% Valid Cum.
|
% Total
|
% Total Cum.
|
|
No
|
174
|
58.19398
|
58.19398
|
58.19398
|
58.19398
|
|
Yes
|
125
|
41.80602
|
100.00000
|
41.80602
|
100.00000
|
|
<NA>
|
0
|
NA
|
NA
|
0.00000
|
100.00000
|
|
Total
|
299
|
100.00000
|
100.00000
|
100.00000
|
100.00000
|
freq(df$smoking) %>% kable() %>% kable_classic() ## smoking
|
|
Freq
|
% Valid
|
% Valid Cum.
|
% Total
|
% Total Cum.
|
|
No
|
203
|
67.89298
|
67.89298
|
67.89298
|
67.89298
|
|
Yes
|
96
|
32.10702
|
100.00000
|
32.10702
|
100.00000
|
|
<NA>
|
0
|
NA
|
NA
|
0.00000
|
100.00000
|
|
Total
|
299
|
100.00000
|
100.00000
|
100.00000
|
100.00000
|
freq(df$high_blood_pressure) %>% kable() %>% kable_classic() #high blood pressure
|
|
Freq
|
% Valid
|
% Valid Cum.
|
% Total
|
% Total Cum.
|
|
No
|
194
|
64.88294
|
64.88294
|
64.88294
|
64.88294
|
|
Yes
|
105
|
35.11706
|
100.00000
|
35.11706
|
100.00000
|
|
<NA>
|
0
|
NA
|
NA
|
0.00000
|
100.00000
|
|
Total
|
299
|
100.00000
|
100.00000
|
100.00000
|
100.00000
|
freq(df$sex) %>% kable() %>% kable_classic() #sex
|
|
Freq
|
% Valid
|
% Valid Cum.
|
% Total
|
% Total Cum.
|
|
Female
|
105
|
35.11706
|
35.11706
|
35.11706
|
35.11706
|
|
Male
|
194
|
64.88294
|
100.00000
|
64.88294
|
100.00000
|
|
<NA>
|
0
|
NA
|
NA
|
0.00000
|
100.00000
|
|
Total
|
299
|
100.00000
|
100.00000
|
100.00000
|
100.00000
|
descr(df$age) %>% kable() %>% kable_classic() #age
|
|
age
|
|
Mean
|
60.8338930
|
|
Std.Dev
|
11.8948091
|
|
Min
|
40.0000000
|
|
Q1
|
51.0000000
|
|
Median
|
60.0000000
|
|
Q3
|
70.0000000
|
|
Max
|
95.0000000
|
|
MAD
|
14.8260000
|
|
IQR
|
19.0000000
|
|
CV
|
0.1955293
|
|
Skewness
|
0.4188266
|
|
SE.Skewness
|
0.1409539
|
|
Kurtosis
|
-0.2204793
|
|
N.Valid
|
299.0000000
|
|
Pct.Valid
|
100.0000000
|
# Create histogram with normal curve
age_dist<-df %>%
ggplot(aes(x = age)) +
# First, add the histogram
geom_histogram(aes(y = after_stat(density)),
bins = 15, # Adjust number of bins for smoothness
fill = "lightblue", # Make histogram slightly transparent
alpha = 0.7) +
# Add the normal distribution curve
stat_function(fun = dnorm,
args = list(
mean = mean(df$age),
sd = sd(df$age)
),
color = "orange",
linewidth = .5) +
# Add proper labels
labs(
title = "Age Distribution with Normal Curve",
x = "Age",
y = "Density"
) +
theme_bw()+
theme(
legend.position = "bottom",
legend.box = "horizontal"
)
chrt <- function(df, var_name, t, labels = c("No", "Yes"), label_text = "(0=No, 1=Yes)") {
df %>%
count(.data[[var_name]]) %>%
ggplot(aes(x = factor(.data[[var_name]]), y = n,
fill = factor(.data[[var_name]]))) +
geom_col() +
labs(title = paste0(t, " ", label_text),
x = "Status",
y = "No of Patients") +
scale_fill_discrete(name = "Status", labels = labels) +
scale_x_discrete(labels = labels) +
theme_bw() +
theme(
legend.position = "bottom",
legend.box = "horizontal"
)
}
diab_dist<-df %>% chrt(var_name = "diabetes", t = "Diabetes Status")
anaemia_dist<-df %>% chrt(var_name = "anaemia",t="Anaemia Status")
hb_dist<-df %>% chrt(var_name = "high_blood_pressure",t="High Blood Pressure Status")
sex_dist<-df %>% chrt(var_name = "sex",t="Patient Distribution By Sex",labels = c("Male","Female"),label_text="")
smoking_dist<-df %>% chrt(var_name = "smoking",t="Smoking Status")
combined_plot <- (sex_dist + age_dist) / (diab_dist + smoking_dist) /(anaemia_dist + hb_dist) +
plot_layout(
heights = c(2,2,2)
)
combined_plot +
plot_annotation(
title = "Heart Failure Patient Visual Characteristics",
theme = theme(plot.title = element_text(size = 8, hjust = 0.5))
)

# this is the surv object representation of the data
s_ob<-Surv(df$time,df$DEATH_EVENT)
make_km_frame<-function(km) {
strata_factor <- rep(names(km$strata), km$strata)
frame<- data.frame(
strata=strata_factor,
time=km$time,
risk=km$n.risk,
event=km$n.event,
survival=km$surv,
lower=km$lower,
upper=km$upper
) %>% filter(event!=0,time %in%c(5*(1:50)))
return (frame %>% select(strata,everything()))
}
km_tbl_options<-function(df,caption="s") {
return (kable(
df,
digits = 4, # Round numeric columns to 3 decimal places
caption = caption,
col.names = c("Time", "At Risk", "Events", "Survival", "Lower CI", "Upper CI"),
longtable = TRUE
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
latex_options = c("scale_down", "hold_position", "repeat_header", "striped"),
font_size = 8, # Reduce font size further
full_width = F # Don't force full width
)
)
}
Kaplan Meier (Age)
km_sex<-survfit(s_ob~sex,data = df)
sdf<-make_km_frame(km_sex)
sdf %>% filter(strata == "sex=Female") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Female Patients")
Survival Analysis - Female Patients
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
104
|
1
|
0.9810
|
0.9552
|
1.0000
|
|
15
|
102
|
2
|
0.9617
|
0.9256
|
0.9992
|
|
20
|
99
|
1
|
0.9520
|
0.9118
|
0.9940
|
|
30
|
94
|
2
|
0.9032
|
0.8480
|
0.9621
|
|
45
|
86
|
1
|
0.8345
|
0.7656
|
0.9096
|
|
60
|
85
|
1
|
0.8247
|
0.7543
|
0.9017
|
|
65
|
83
|
1
|
0.8147
|
0.7429
|
0.8936
|
|
90
|
69
|
1
|
0.7618
|
0.6827
|
0.8500
|
|
95
|
64
|
1
|
0.7499
|
0.6692
|
0.8403
|
|
100
|
59
|
1
|
0.7372
|
0.6546
|
0.8301
|
|
115
|
51
|
1
|
0.7227
|
0.6379
|
0.8188
|
|
130
|
47
|
1
|
0.6926
|
0.6032
|
0.7952
|
sdf %>% filter(strata == "sex=Male") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Male Patients")
Survival Analysis - Male Patients
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
189
|
5
|
0.9485
|
0.9178
|
0.9801
|
|
20
|
179
|
1
|
0.9175
|
0.8796
|
0.9571
|
|
30
|
170
|
2
|
0.8709
|
0.8249
|
0.9194
|
|
35
|
164
|
1
|
0.8552
|
0.8070
|
0.9063
|
|
40
|
162
|
1
|
0.8447
|
0.7951
|
0.8974
|
|
50
|
159
|
1
|
0.8290
|
0.7775
|
0.8839
|
|
55
|
156
|
1
|
0.8236
|
0.7715
|
0.8793
|
|
60
|
154
|
1
|
0.8130
|
0.7597
|
0.8700
|
|
65
|
150
|
1
|
0.7970
|
0.7420
|
0.8560
|
|
90
|
120
|
1
|
0.7630
|
0.7046
|
0.8261
|
|
135
|
86
|
1
|
0.7178
|
0.6537
|
0.7881
|
|
150
|
75
|
1
|
0.7082
|
0.6427
|
0.7804
|
|
170
|
72
|
1
|
0.6795
|
0.6100
|
0.7569
|
|
180
|
68
|
2
|
0.6502
|
0.5774
|
0.7322
|
|
235
|
23
|
1
|
0.5915
|
0.5022
|
0.6967
|
ggsurvplot(km_sex,
data = df
,pval = T,
tables.height = 0.3,
censor=F,
conf.int = F, # Show confidence intervals
risk.table = TRUE, # Add risk table below
ggtheme = theme_bw(),
xlab = "Time in Days",
ylab = "Kaplan Meir Survival Estimates",
title = "Kaplan Meir Survival Estimates by sex")

Kaplan Meier (Diabetes)
km_dia<-survfit(s_ob~diabetes,data = df)
sdf_dia<-make_km_frame(km_dia)
sdf_dia %>% filter(strata == "diabetes=No") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Patients with No Diabetes")
Survival Analysis - Patients with No Diabetes
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
169
|
5
|
0.9425
|
0.9086
|
0.9778
|
|
15
|
158
|
1
|
0.9079
|
0.8659
|
0.9519
|
|
20
|
156
|
1
|
0.9021
|
0.8589
|
0.9474
|
|
50
|
146
|
1
|
0.8439
|
0.7915
|
0.8998
|
|
55
|
143
|
1
|
0.8380
|
0.7848
|
0.8948
|
|
60
|
142
|
1
|
0.8321
|
0.7781
|
0.8898
|
|
65
|
137
|
1
|
0.8142
|
0.7581
|
0.8744
|
|
90
|
111
|
2
|
0.7628
|
0.7009
|
0.8301
|
|
95
|
106
|
1
|
0.7556
|
0.6929
|
0.8239
|
|
115
|
85
|
1
|
0.7151
|
0.6475
|
0.7897
|
|
170
|
64
|
1
|
0.6931
|
0.6219
|
0.7723
|
|
180
|
60
|
2
|
0.6593
|
0.5835
|
0.7450
|
sdf_dia %>% filter(strata == "diabetes=Yes") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Patients with Diabetes")
Survival Analysis - Patients with Diabetes
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
124
|
1
|
0.9840
|
0.9622
|
1.0000
|
|
15
|
123
|
1
|
0.9760
|
0.9495
|
1.0000
|
|
20
|
122
|
1
|
0.9680
|
0.9376
|
0.9994
|
|
30
|
112
|
4
|
0.8790
|
0.8234
|
0.9383
|
|
35
|
103
|
1
|
0.8460
|
0.7847
|
0.9122
|
|
40
|
102
|
1
|
0.8377
|
0.7751
|
0.9055
|
|
45
|
99
|
1
|
0.8129
|
0.7467
|
0.8849
|
|
60
|
97
|
1
|
0.7963
|
0.7280
|
0.8709
|
|
65
|
96
|
1
|
0.7880
|
0.7188
|
0.8638
|
|
100
|
72
|
1
|
0.7518
|
0.6782
|
0.8333
|
|
130
|
60
|
1
|
0.7154
|
0.6362
|
0.8044
|
|
135
|
59
|
1
|
0.7033
|
0.6225
|
0.7945
|
|
150
|
53
|
1
|
0.6900
|
0.6074
|
0.7839
|
|
235
|
21
|
1
|
0.6025
|
0.5027
|
0.7222
|
ggsurvplot(km_dia,
data = df
,pval = T,
tables.height = 0.3,
censor=F,
conf.int = F, # Show confidence intervals
risk.table = TRUE, # Add risk table below
ggtheme = theme_bw(),
xlab = "Time in Days",
ylab = "Kaplan Meir Survival Estimates",
title = "Kaplan Meir Survival Estimates by Diabetes Status"
)

Nelson-Aalen (Smoking)
na_smoke <- survfit(
s_ob ~ smoking, # Proper survival object formula
data = df,
type = "fleming-harrington", # This gives us Nelson-Aalen estimation
conf.type = "log-log" # Appropriate confidence interval type
)
sdf_smoke<-make_km_frame(na_smoke)
sdf_smoke %>% filter(strata == "smoking=No") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Patients who do not smoke")
Survival Analysis - Patients who do not smoke
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
199
|
2
|
0.9705
|
0.9356
|
0.9867
|
|
15
|
191
|
2
|
0.9361
|
0.8925
|
0.9624
|
|
20
|
188
|
1
|
0.9311
|
0.8865
|
0.9586
|
|
30
|
178
|
4
|
0.8763
|
0.8224
|
0.9147
|
|
50
|
164
|
1
|
0.8307
|
0.7712
|
0.8759
|
|
60
|
163
|
2
|
0.8206
|
0.7600
|
0.8671
|
|
65
|
159
|
1
|
0.8104
|
0.7488
|
0.8582
|
|
90
|
131
|
1
|
0.7783
|
0.7137
|
0.8301
|
|
95
|
124
|
1
|
0.7720
|
0.7067
|
0.8247
|
|
100
|
118
|
1
|
0.7592
|
0.6922
|
0.8135
|
|
115
|
102
|
1
|
0.7377
|
0.6679
|
0.7951
|
|
130
|
92
|
1
|
0.7143
|
0.6414
|
0.7750
|
|
135
|
91
|
1
|
0.7065
|
0.6327
|
0.7682
|
|
180
|
75
|
2
|
0.6554
|
0.5757
|
0.7238
|
|
235
|
25
|
1
|
0.5712
|
0.4728
|
0.6579
|
sdf_smoke %>% filter(strata == "smoking=Yes") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Patients who smoke")
Survival Analysis - Patients who smoke
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
94
|
4
|
0.9385
|
0.8682
|
0.9719
|
|
20
|
90
|
1
|
0.9281
|
0.8551
|
0.9651
|
|
35
|
83
|
1
|
0.8556
|
0.7684
|
0.9118
|
|
40
|
82
|
1
|
0.8453
|
0.7565
|
0.9037
|
|
45
|
81
|
1
|
0.8349
|
0.7447
|
0.8954
|
|
55
|
78
|
1
|
0.8243
|
0.7326
|
0.8869
|
|
65
|
74
|
1
|
0.7921
|
0.6964
|
0.8606
|
|
90
|
58
|
1
|
0.7336
|
0.6310
|
0.8118
|
|
150
|
34
|
1
|
0.6985
|
0.5878
|
0.7848
|
|
170
|
32
|
1
|
0.6568
|
0.5368
|
0.7527
|
ggsurvplot(
na_smoke,
fun = "cumhaz", # This tells ggsurvplot to show cumulative hazard
data = df,
censor=F,
conf.int = F, # Show confidence intervals
risk.table = TRUE, # Add risk table below
ggtheme = theme_bw(),
xlab = "Time in Days",
ylab = "Cumulative Hazard",
title = "Nelson-Aalen Cumulative Hazard Estimate by Smoking Status"
)

Nelson Aalen (HBP)
na_hbp <- survfit(
s_ob ~ high_blood_pressure, # Proper survival object formula
data = df,
type = "fleming-harrington", # This gives us Nelson-Aalen estimation
conf.type = "log-log" # Appropriate confidence interval type
)
sdf_hbp<-make_km_frame(na_hbp)
sdf_hbp %>% filter(strata == "high_blood_pressure=No") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Patients who do not HBP")
Survival Analysis - Patients who do not HBP
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
190
|
3
|
0.9641
|
0.9262
|
0.9827
|
|
15
|
183
|
1
|
0.9384
|
0.8941
|
0.9646
|
|
30
|
176
|
3
|
0.9023
|
0.8511
|
0.9365
|
|
35
|
171
|
1
|
0.8918
|
0.8389
|
0.9281
|
|
45
|
168
|
1
|
0.8761
|
0.8209
|
0.9152
|
|
50
|
167
|
1
|
0.8709
|
0.8149
|
0.9109
|
|
65
|
160
|
2
|
0.8495
|
0.7907
|
0.8929
|
|
90
|
132
|
2
|
0.7990
|
0.7342
|
0.8496
|
|
95
|
126
|
1
|
0.7927
|
0.7271
|
0.8442
|
|
135
|
97
|
1
|
0.7492
|
0.6775
|
0.8072
|
|
150
|
86
|
1
|
0.7405
|
0.6675
|
0.8000
|
|
170
|
84
|
1
|
0.7232
|
0.6476
|
0.7853
|
|
180
|
77
|
1
|
0.6883
|
0.6084
|
0.7553
|
|
235
|
34
|
1
|
0.6324
|
0.5391
|
0.7119
|
sdf_hbp %>% filter(strata == "high_blood_pressure=Yes") %>%
select(time,risk,event,survival,lower,upper) %>%
km_tbl_options("Survival Analysis - Patients who have HBP")
Survival Analysis - Patients who have HBP
|
Time
|
At Risk
|
Events
|
Survival
|
Lower CI
|
Upper CI
|
|
10
|
103
|
3
|
0.9529
|
0.8905
|
0.9801
|
|
15
|
98
|
1
|
0.9338
|
0.8662
|
0.9679
|
|
20
|
96
|
2
|
0.9146
|
0.8422
|
0.9546
|
|
30
|
88
|
1
|
0.8468
|
0.7621
|
0.9032
|
|
40
|
82
|
1
|
0.7982
|
0.7074
|
0.8635
|
|
55
|
77
|
1
|
0.7494
|
0.6541
|
0.8219
|
|
60
|
75
|
2
|
0.7201
|
0.6228
|
0.7963
|
|
100
|
51
|
1
|
0.6842
|
0.5830
|
0.7658
|
|
115
|
43
|
1
|
0.6535
|
0.5474
|
0.7405
|
|
130
|
36
|
1
|
0.6355
|
0.5263
|
0.7261
|
|
180
|
29
|
1
|
0.5951
|
0.4784
|
0.6939
|
ggsurvplot(
na_hbp,
fun = "cumhaz", # This tells ggsurvplot to show cumulative hazard
data = df,
censor=F,
conf.int = F, # Show confidence intervals
risk.table = TRUE, # Add risk table below
ggtheme = theme_bw(),
xlab = "Time in Days",
ylab = "Cumulative Hazard",
title = "Nelson-Aalen Cumulative Hazard Estimate by HBP Status"
)

Log-Rank Test
using HBP Estimates
lr_table<-function(diff) {
df<-data.frame(
name=diff$n,
obs=diff$obs,
exp=diff$exp,
pv=diff$pvalue,
chq=diff$chisq
) %>%t() %>% kable() %>% kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
latex_options = c("scale_down", "hold_position"), # Added hold_position
full_width = F
)
return(df)
}
sdiff_hbp<-survdiff(s_ob~high_blood_pressure,data = df,rho = 0)
lr_table(sdiff_hbp)
|
|
|
|
|
name.groups
|
high_blood_pressure=No
|
high_blood_pressure=Yes
|
|
name.Freq
|
194
|
105
|
|
obs
|
57
|
39
|
|
exp
|
66.42245
|
29.57755
|
|
pv
|
0.03580752
|
0.03580752
|
|
chq
|
4.406248
|
4.406248
|
sdiff_smoke<-survdiff(s_ob~smoking,data=df,rho = 0)
lr_table(sdiff_smoke)
|
|
|
|
|
name.groups
|
smoking=No
|
smoking=Yes
|
|
name.Freq
|
203
|
96
|
|
obs
|
66
|
30
|
|
exp
|
65.79487
|
30.20513
|
|
pv
|
0.9639597
|
0.9639597
|
|
chq
|
0.002041704
|
0.002041704
|
sdiff_sex<-survdiff(s_ob~sex,data=df,rho = 0)
lr_table(sdiff_sex)
|
|
|
|
|
name.groups
|
sex=Female
|
sex=Male
|
|
name.Freq
|
105
|
194
|
|
obs
|
34
|
62
|
|
exp
|
34.2953
|
61.7047
|
|
pv
|
0.9497523
|
0.9497523
|
|
chq
|
0.003971242
|
0.003971242
|
sdiff_dia<-survdiff(s_ob~diabetes,data=df,rho = 0)
lr_table(sdiff_dia)
|
|
|
|
|
name.groups
|
diabetes=No
|
diabetes=Yes
|
|
name.Freq
|
174
|
125
|
|
obs
|
56
|
40
|
|
exp
|
55.02732
|
40.97268
|
|
pv
|
0.840452
|
0.840452
|
|
chq
|
0.04052788
|
0.04052788
|
sdiff_anae<-survdiff(s_ob~anaemia,data=df,rho = 0)
lr_table(sdiff_anae)
|
|
|
|
|
name.groups
|
anaemia=No
|
anaemia=Yes
|
|
name.Freq
|
170
|
129
|
|
obs
|
50
|
46
|
|
exp
|
57.8783
|
38.1217
|
|
pv
|
0.09869758
|
0.09869758
|
|
chq
|
2.726464
|
2.726464
|
Conclusion
Based on the log-rank test results, only high blood pressure
demonstrated a significant difference in survival patterns among heart
failure patients (p = 0.035). Other clinical variables examined showed
no statistically significant impact on survival distributions at the 5%
significance level.
Cox-Proportional Hazard
cox_sex<-coxph(s_ob~sex,data = df)
tidy(cox_sex) %>%t() %>% kable() %>% kable_styling(
bootstrap_options = c("striped", "condensed", "hover"),
latex_options = c("scale_down", "hold_position"),
font_size = 9,
position = "center", # Centers the table
full_width = F # Better for transposed tables
)
|
|
|
|
term
|
sexMale
|
|
estimate
|
0.01356273
|
|
std.error
|
0.2134404
|
|
statistic
|
0.06354342
|
|
p.value
|
0.9493338
|
cox_smoking<-coxph(s_ob~smoking,data = df)
tidy(cox_smoking) %>% t() %>% kable() %>% kable_styling(
bootstrap_options = c("striped", "condensed", "hover"),
latex_options = c("scale_down", "hold_position"),
font_size = 9,
position = "center", # Centers the table
full_width = F # Better for transposed tables
)
|
|
|
|
term
|
smokingYes
|
|
estimate
|
-0.009586323
|
|
std.error
|
0.2203002
|
|
statistic
|
-0.04351483
|
|
p.value
|
0.9652911
|
cox_hbp<-coxph(s_ob~high_blood_pressure,data = df)
tidy(cox_hbp) %>% t() %>% kable() %>% kable_styling(
bootstrap_options = c("striped", "condensed", "hover"),
latex_options = c("scale_down", "hold_position"),
font_size = 9,
position = "center", # Centers the table
full_width = F # Better for transposed tables
)
|
|
|
|
term
|
high_blood_pressureYes
|
|
estimate
|
0.4359492
|
|
std.error
|
0.2093982
|
|
statistic
|
2.081915
|
|
p.value
|
0.03735024
|
cox_anae<-coxph(s_ob~anaemia,data = df)
tidy(cox_anae) %>% t() %>% kable() %>% kable_styling(
bootstrap_options = c("striped", "condensed", "hover"),
latex_options = c("scale_down", "hold_position"),
font_size = 9,
position = "center", # Centers the table
full_width = F # Better for transposed tables
)
|
|
|
|
term
|
anaemiaYes
|
|
estimate
|
0.3373673
|
|
std.error
|
0.2049819
|
|
statistic
|
1.64584
|
|
p.value
|
0.0997968
|
cox_dia<-coxph(s_ob~diabetes,data = df)
tidy(cox_dia) %>% t() %>% kable() %>% kable_styling(
bootstrap_options = c("striped", "condensed", "hover"),
latex_options = c("scale_down", "hold_position"),
font_size = 9,
position = "center", # Centers the table
full_width = F # Better for transposed tables
)
|
|
|
|
term
|
diabetesYes
|
|
estimate
|
-0.04184191
|
|
std.error
|
0.2072847
|
|
statistic
|
-0.2018572
|
|
p.value
|
0.8400284
|
Conclusion
Based on the Cox proportional hazards regression analysis, the
results align with our log-rank test findings - high blood pressure
remains the only significant predictor of survival (p = 0.037) at the 5%
significance level, while other clinical variables show no statistically
significant impact on survival outcomes.