Heart failure data analysis

Multivariate Logistic Regression and Surnival analysis (Cox model and Kaplan-Meier model) of Heart failure survival data

#Relative Path
setwd("/Users/kiteomoru/Desktop/Epidata/KAGGLEDATA")

#analysis of medical health insurance data (linear regression)
library(readxl)

## Warning: package 'readxl' was built under R version 4.1.2

library(ggplot2)
library(reshape)

## Warning: package 'reshape' was built under R version 4.1.2

library(psych)

## Warning: package 'psych' was built under R version 4.1.2

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(car)

## Warning: package 'car' was built under R version 4.1.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.1.2

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ✔ purrr   0.3.4

## Warning: package 'tidyr' was built under R version 4.1.2

## Warning: package 'readr' was built under R version 4.1.2

## Warning: package 'dplyr' was built under R version 4.1.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()    masks ggplot2::%+%()
## ✖ psych::alpha()  masks ggplot2::alpha()
## ✖ tidyr::expand() masks reshape::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ dplyr::rename() masks reshape::rename()
## ✖ purrr::some()   masks car::some()

library(skimr)

## Warning: package 'skimr' was built under R version 4.1.2

library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(ggpubr)
library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.1.2

library(stringr)
library(purrr)
library(tidyr)
library(pacman)
library(survival)

## Warning: package 'survival' was built under R version 4.1.2

library(survminer)

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

library(parameters)

## Warning: package 'parameters' was built under R version 4.1.2

theme_set(theme_pubr())
##forestplot
pacman::p_load(easystats)

## Warning: package 'easystats' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

## Warning in p_install(package, character.only = TRUE, ...):

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'easystats'

## Warning in pacman::p_load(easystats): Failed to install/load:
## easystats

#load data
#data obtained from Kaggle
HFS_data= read_excel("/Users/kiteomoru/Desktop/Epidata/KAGGLEDATA/survivalheart_failure_clinical_records_dataset.xlsx")
data= as.data.frame(HFS_data)
head(data)

##   age anaemia creatinine_phosphokinase diabetes ejection_fraction
## 1  75       0                      582        0                20
## 2  55       0                     7861        0                38
## 3  65       0                      146        0                20
## 4  50       1                      111        0                20
## 5  65       1                      160        1                20
## 6  90       1                       47        0                40
##   high_blood_pressure platelets serum_creatinine serum_sodium sex smoking time
## 1                   1    265000              1.9          130   1       0    4
## 2                   0    263358              1.1          136   1       0    6
## 3                   0    162000              1.3          129   1       1    7
## 4                   0    210000              1.9          137   1       0    7
## 5                   0    327000              2.7          116   0       0    8
## 6                   1    204000              2.1          132   1       1    8
##   DEATH_EVENT
## 1           1
## 2           1
## 3           1
## 4           1
## 5           1
## 6           1

#data exploration
skim(data)

Data summary
Name	data
Number of rows	299
Number of columns	13
_______________________
Column type frequency:
numeric	13
________________________
Group variables	None

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
age	1	60.83	11.89	40.0	51.0	60.0	70.0	95.0	▆▇▇▂▁
anaemia	1	0.43	0.50	0.0	0.0	0.0	1.0	1.0	▇▁▁▁▆
creatinine_phosphokinase	1	581.84	970.29	23.0	116.5	250.0	582.0	7861.0	▇▁▁▁▁
diabetes	1	0.42	0.49	0.0	0.0	0.0	1.0	1.0	▇▁▁▁▆
ejection_fraction	1	38.08	11.83	14.0	30.0	38.0	45.0	80.0	▃▇▂▂▁
high_blood_pressure	1	0.35	0.48	0.0	0.0	0.0	1.0	1.0	▇▁▁▁▅
platelets	1	263358.03	97804.24	25100.0	212500.0	262000.0	303500.0	850000.0	▂▇▂▁▁
serum_creatinine	1	1.39	1.03	0.5	0.9	1.1	1.4	9.4	▇▁▁▁▁
serum_sodium	1	136.63	4.41	113.0	134.0	137.0	140.0	148.0	▁▁▃▇▁
sex	1	0.65	0.48	0.0	0.0	1.0	1.0	1.0	▅▁▁▁▇
smoking	1	0.32	0.47	0.0	0.0	0.0	1.0	1.0	▇▁▁▁▃
time	1	130.26	77.61	4.0	73.0	115.0	203.0	285.0	▆▇▃▆▃
DEATH_EVENT	1	0.32	0.47	0.0	0.0	0.0	1.0	1.0	▇▁▁▁▃

#Correlation analysis
cor_data_m=cor(data)
#visualize

cor_data_m_melt= melt(cor_data_m)

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

cor_data_m_melt$value= round(cor_data_m_melt$value, digits = 2)
heatmap= ggplot(cor_data_m_melt, aes(X1, X2, fill= value))+
  geom_tile(color = "white", lwd= 1.5, linetype= 1) +
  scale_fill_gradient2(low = "#2C7BB6",
                       mid = "white",
                       high = "#D7191C", breaks=seq(-1,1,0.2), limits= c(-1,1)) +
  coord_fixed() + 
  theme_minimal(base_family="Helvetica")+
  guides(fill = guide_colourbar(barwidth = 0.5,barheight = 20))

heatmap2=heatmap + 
  geom_text(aes(X1, X2, label = value), color = "black", size = 2) +
  theme(axis.ticks=element_blank())+
  theme(axis.text.x=element_text(angle = 45, hjust = 1))
heatmap2

#Convert variables to factors
#convert 1 and 0 to 'M' and 'F'
data$sex[data$sex  == 0]<- 'F'
data$sex[data$sex  == 1]<- 'M'


str(data$diabetes)

##  num [1:299] 0 0 0 0 1 0 0 1 0 0 ...

str(data$sex)

##  chr [1:299] "M" "M" "M" "M" "F" "M" "M" "M" "F" "M" "M" "M" "M" "M" "F" ...

str(data$high_blood_pressure)

##  num [1:299] 1 0 0 0 0 1 0 0 0 1 ...

str(data$smoking)

##  num [1:299] 0 0 1 0 0 1 0 1 0 1 ...

str(data$DEATH_EVENT)

##  num [1:299] 1 1 1 1 1 1 1 1 1 1 ...

data$diabetes= as.factor(data$diabetes)
data$sex= as.factor(data$sex)
data$smoking = as.factor(data$smoking)
data$high_blood_pressure= as.factor(data$high_blood_pressure)
data$anaemia= as.factor(data$anaemia)
data$DEATH_EVENT= as.factor(data$DEATH_EVENT)

#Statistics
#scatterplot to find relationship among variables
pairs.panels(data)

#visualize Normality distribution
par(mfrow = c(2, 4))
qqPlot(data$age)

## [1] 27 56

qqPlot(data$creatinine_phosphokinase)

## [1]  2 61

qqPlot(data$ejection_fraction)

## [1]  65 218

qqPlot(data$platelets)

## [1] 110 297

qqPlot(data$serum_creatinine)

## [1]  10 218

qqPlot(data$serum_sodium)

## [1] 200   5

qqPlot(data$time)

## [1] 299 298

#Shapiro-Wilk
shapiro.test(data$age)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$age
## W = 0.97547, p-value = 5.35e-05

shapiro.test(data$creatinine_phosphokinase)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$creatinine_phosphokinase
## W = 0.51426, p-value < 2.2e-16

shapiro.test(data$ejection_fraction)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$ejection_fraction
## W = 0.94732, p-value = 7.216e-09

shapiro.test(data$platelets)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$platelets
## W = 0.91151, p-value = 2.883e-12

shapiro.test(data$serum_creatinine)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$serum_creatinine
## W = 0.55147, p-value < 2.2e-16

shapiro.test(data$serum_sodium)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$serum_sodium
## W = 0.93903, p-value = 9.215e-10

shapiro.test(data$time)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$time
## W = 0.94678, p-value = 6.285e-09

#Wilcoxons test
## testing for significant differences between continuous variables and outcome outcome variables  with a wilcox test
wilcox.test(age ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  age by DEATH_EVENT
## W = 7121, p-value = 0.0001668
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(creatinine_phosphokinase ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  creatinine_phosphokinase by DEATH_EVENT
## W = 9460, p-value = 0.684
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(ejection_fraction ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ejection_fraction by DEATH_EVENT
## W = 13176, p-value = 7.368e-07
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(platelets ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  platelets by DEATH_EVENT
## W = 10300, p-value = 0.4256
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(serum_creatinine ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  serum_creatinine by DEATH_EVENT
## W = 5298, p-value = 1.581e-10
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(serum_sodium ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  serum_sodium by DEATH_EVENT
## W = 12262, p-value = 0.0002928
## alternative hypothesis: true location shift is not equal to 0

wilcox.test(time ~ DEATH_EVENT, data = data)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  time by DEATH_EVENT
## W = 16288, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

#Chi-Square test
## testing for significant differences between categorical groups.
chisq.test(data$anaemia, data$DEATH_EVENT)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data$anaemia and data$DEATH_EVENT
## X-squared = 1.0422, df = 1, p-value = 0.3073

chisq.test(data$diabetes, data$DEATH_EVENT)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data$diabetes and data$DEATH_EVENT
## X-squared = 2.1617e-30, df = 1, p-value = 1

chisq.test(data$high_blood_pressure, data$DEATH_EVENT)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data$high_blood_pressure and data$DEATH_EVENT
## X-squared = 1.5435, df = 1, p-value = 0.2141

chisq.test(data$sex, data$DEATH_EVENT)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data$sex and data$DEATH_EVENT
## X-squared = 0, df = 1, p-value = 1

chisq.test(data$smoking, data$DEATH_EVENT)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data$smoking and data$DEATH_EVENT
## X-squared = 0.0073315, df = 1, p-value = 0.9318

#Convert age to categories - age_group
data= data %>% 
  mutate(
    # Create categories
    age_group = dplyr::case_when(
     
      age > 39 & age <= 60 ~ "40-60",
      age > 60 & age <= 80 ~ "61-80",
      age > 80            ~ "> 80"
    ),
    # Convert to factor
    age_group = factor(
      age_group,
      level = c("40-60","61-80", "> 80")
    )
  )
# Death event and Age group
model2 <- glm(DEATH_EVENT ~ age_group, family = "binomial", data = data)
summary(model2)

## 
## Call:
## glm(formula = DEATH_EVENT ~ age_group, family = "binomial", data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6006  -0.8912  -0.7961   1.4937   1.6146  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.9865     0.1766  -5.585 2.34e-08 ***
## age_group61-80   0.2680     0.2633   1.018 0.308750    
## age_group> 80    1.9420     0.5551   3.499 0.000468 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 375.35  on 298  degrees of freedom
## Residual deviance: 361.31  on 296  degrees of freedom
## AIC: 367.31
## 
## Number of Fisher Scoring iterations: 4

#Looping multiple univariate models
## define variables of interest 
explanatory_vars1<- c("age_group", "serum_creatinine", "anaemia", "creatinine_phosphokinase", "diabetes","ejection_fraction", 
                      "high_blood_pressure", "platelets", "serum_creatinine", "serum_sodium", "sex", "smoking", "time")
explanatory_vars1 %>% str_c("DEATH_EVENT ~ ", .)

##  [1] "DEATH_EVENT ~ age_group"               
##  [2] "DEATH_EVENT ~ serum_creatinine"        
##  [3] "DEATH_EVENT ~ anaemia"                 
##  [4] "DEATH_EVENT ~ creatinine_phosphokinase"
##  [5] "DEATH_EVENT ~ diabetes"                
##  [6] "DEATH_EVENT ~ ejection_fraction"       
##  [7] "DEATH_EVENT ~ high_blood_pressure"     
##  [8] "DEATH_EVENT ~ platelets"               
##  [9] "DEATH_EVENT ~ serum_creatinine"        
## [10] "DEATH_EVENT ~ serum_sodium"            
## [11] "DEATH_EVENT ~ sex"                     
## [12] "DEATH_EVENT ~ smoking"                 
## [13] "DEATH_EVENT ~ time"

#Multivariate Logistic Regression - glm()
# Death event and all other variables
## run a regression with all variables of interest 
mv_reg <- explanatory_vars1 %>%  ## begin with vector of explanatory column names
  str_c(collapse = "+") %>%     ## combine all names of the variables of interest separated by a plus
  str_c("DEATH_EVENT ~ ", .) %>%    ## combine the names of variables of interest with outcome in formula style
  glm(family = "binomial",      ## define type of glm as logistic,
      data = data)          ## define your dataset

## choose a model using forward selection based on AIC
## you can also do "backward" or "both" by adjusting the direction
final_mv_reg <- mv_reg %>%
  step(direction = "forward", trace = FALSE)

mv_tab_base <- final_mv_reg %>% 
  broom::tidy(exponentiate = TRUE, conf.int = TRUE) %>%  ## get a tidy dataframe of estimates 
  mutate(across(where(is.numeric), round, digits = 2))          ## round 

## show results table of final regression -HTML Format
mv_table <- tbl_regression(final_mv_reg, exponentiate = TRUE)
mv_table

Characteristic	OR¹	95% CI¹	p-value
age_group
40-60	—	—
61-80	1.54	0.75, 3.18	0.2
> 80	6.60	1.61, 32.2	0.013
serum_creatinine	2.00	1.43, 2.95	<0.001
anaemia
0	—	—
1	0.97	0.47, 1.96	>0.9
creatinine_phosphokinase	1.00	1.00, 1.00	0.4
diabetes
0	—	—
1	1.18	0.59, 2.36	0.6
ejection_fraction	0.93	0.89, 0.95	<0.001
high_blood_pressure
0	—	—
1	1.00	0.48, 2.02	>0.9
platelets	1.00	1.00, 1.00	0.4
serum_sodium	0.94	0.86, 1.01	0.10
sex
F	—	—
M	0.61	0.27, 1.37	0.2
smoking
0	—	—
1	1.04	0.46, 2.34	>0.9
time	0.98	0.97, 0.98	<0.001
¹ OR = Odds Ratio, CI = Confidence Interval

## remove the intercept term from your multivariable results- forest plot
final_mv_reg %>% 
  model_parameters(exponentiate = TRUE) %>% 
  plot()

#####Survival plot- KM

data_surv= data
summary(data_surv$time)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    73.0   115.0   130.3   203.0   285.0

# cross tabulate the new event var and the outcome var from which it was created
data_surv$DEATH_EVENT= as.numeric(data_surv$DEATH_EVENT)
data_surv$DEATH_EVENT[data_surv$DEATH_EVENT  == 2]<- 'Alive'
data_surv$DEATH_EVENT[data_surv$DEATH_EVENT  == 1]<- 'Dead'

data_surv= data_surv%>%
  dplyr::mutate(
  # create the event var which is 1 if the patient died and 0 if he was right censored
  outcome = ifelse(is.na(DEATH_EVENT) | DEATH_EVENT == "Alive", 0, 1))
  
data_surv %>% 
  tabyl(DEATH_EVENT, outcome)

##  DEATH_EVENT  0   1
##        Alive 96   0
##         Dead  0 203

summary(data_surv$age_group)

## 40-60 61-80  > 80 
##   162   119    18

data_surv= data_surv%>%
  select(anaemia, creatinine_phosphokinase, diabetes, ejection_fraction, high_blood_pressure,platelets, serum_creatinine,
         serum_sodium,   serum_sodium, sex, smoking,time,  DEATH_EVENT, age_group, outcome )

##Descriptive table
data_surv %>% 
  tabyl(sex, age_group, show_na = F) %>% 
  adorn_totals(where = "both") %>% 
  adorn_percentages() %>% 
  adorn_pct_formatting() %>% 
  adorn_ns(position = "front")

##    sex       40-60       61-80      > 80        Total
##      F  59 (56.2%)  41 (39.0%)  5 (4.8%) 105 (100.0%)
##      M 103 (53.1%)  78 (40.2%) 13 (6.7%) 194 (100.0%)
##  Total 162 (54.2%) 119 (39.8%) 18 (6.0%) 299 (100.0%)

#Survival Object
# create the survfit object based on gender
data_surv_fit_sex <-  survfit(Surv(time, outcome) ~ sex, data = data_surv)

# set colors
sex <- c("lightgreen", "darkgreen")

#Plot
survminer::ggsurvplot(
  data_surv_fit_sex, 
  data = data_surv,          # again specify the data used to fit linelistsurv_fit_sex 
  conf.int = FALSE,              # do not show confidence interval of KM estimates
  surv.scale = "percent",        # present probabilities in the y axis in %
  break.time.by = 10,            # present the time axis with an increment of 10 days
  xlab = "Follow-up days",
  ylab = "Survival Probability",
  pval = T,                      # print p-value of Log-rank test 
  pval.coord = c(40,.91),        # print p-value at these plot coordinates
  risk.table = T,                # print the risk table at bottom 
  legend.title = "Gender",       # legend characteristics
  legend.labs = c("Female","Male"),
  font.legend = 10, 
  palette = "Dark2",             # specify color palette 
  surv.median.line = "hv",       # draw horizontal and vertical lines to the median survivals
  ggtheme = theme_light()        # simplify plot background
)

# SURVIVAL OBJECT
data_surv_fit_BP <-  survfit(
  Surv(time, outcome) ~ high_blood_pressure,
  data = data_surv
)

# HYPERTENSION plot
ggsurvplot( 
  data_surv_fit_BP,
  data = data_surv,
  size = 1, linetype = "strata",   # line types
  conf.int = T,
  surv.scale = "percent",  
  break.time.by = 10, 
  xlab = "Follow-up days",
  ylab= "Survival Probability",
  pval = T,
  pval.coord = c(40,.91),
  risk.table = T,
  legend.title = "HYPERTENSION",
  legend.labs = c("HIGH BP", "NORMAL BP"),
  font.legend = 10,
  palette = c("#E7B800","#3E606F"),
  surv.median.line = "hv", 
  ggtheme = theme_light()
)

#EF to binary variable
data_surv$ejection_fraction_cat= ifelse(data_surv$ejection_fraction >= 30, 1, 0)
data_surv_fit_EF <-  survfit(
  Surv(time, outcome) ~ ejection_fraction_cat,
  data = data_surv
)

# EJECTION FRACTION plot
ggsurvplot( 
  data_surv_fit_EF,
  data = data_surv,
  size = 1, linetype = "strata",   # line types
  conf.int = T,
  surv.scale = "percent",  
  break.time.by = 10, 
  xlab = "Follow-up days",
  ylab= "Survival Probability",
  pval = T,
  pval.coord = c(40,.91),
  risk.table = T,
  legend.title = "EJECTION FRACTION",
  legend.labs = c("NORMAL", "LOW"),
  font.legend = 10,
  palette = c("#E7B800","#3E606F"),
  surv.median.line = "hv", 
  ggtheme = theme_light()
)

##Survival- Cox Model
#fit the model
datasurv_cox <-  coxph(
  Surv(time, outcome) ~ sex + age_group+ ejection_fraction + serum_creatinine+high_blood_pressure,
  data = data_surv
)


#test the proportional hazard model
datasurv_ph_test <- cox.zph(datasurv_cox)
datasurv_ph_test

##                      chisq df     p
## sex                 0.0374  1 0.847
## age_group           2.2491  2 0.325
## ejection_fraction   4.4943  1 0.034
## serum_creatinine    0.0174  1 0.895
## high_blood_pressure 0.5604  1 0.454
## GLOBAL              7.3725  6 0.288

#print the summary of the model
summary(datasurv_cox)

## Call:
## coxph(formula = Surv(time, outcome) ~ sex + age_group + ejection_fraction + 
##     serum_creatinine + high_blood_pressure, data = data_surv)
## 
##   n= 299, number of events= 203 
## 
##                          coef exp(coef) se(coef)      z Pr(>|z|)   
## sexM                  0.11747   1.12465  0.15039  0.781  0.43473   
## age_group61-80        0.09382   1.09837  0.14750  0.636  0.52471   
## age_group> 80         0.12419   1.13223  0.46261  0.268  0.78835   
## ejection_fraction     0.01428   1.01438  0.00706  2.022  0.04315 * 
## serum_creatinine     -0.15251   0.85855  0.11167 -1.366  0.17203   
## high_blood_pressure1  0.50495   1.65690  0.15780  3.200  0.00137 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                      exp(coef) exp(-coef) lower .95 upper .95
## sexM                    1.1247     0.8892    0.8375     1.510
## age_group61-80          1.0984     0.9104    0.8226     1.467
## age_group> 80           1.1322     0.8832    0.4573     2.804
## ejection_fraction       1.0144     0.9858    1.0004     1.029
## serum_creatinine        0.8585     1.1648    0.6898     1.069
## high_blood_pressure1    1.6569     0.6035    1.2161     2.257
## 
## Concordance= 0.592  (se = 0.022 )
## Likelihood ratio test= 18.6  on 6 df,   p=0.005
## Wald test            = 19.44  on 6 df,   p=0.003
## Score (logrank) test = 19.77  on 6 df,   p=0.003

survminer::ggcoxzph(datasurv_ph_test)

ggforest(datasurv_cox, data = data_surv)

Heart failure data analysis

Okiemute Omoru

2022-08-01

Multivariate Logistic Regression and Surnival analysis (Cox model and Kaplan-Meier model) of Heart failure survival data