Heart Failure Clinical Record Analysis

Statistical Analysis of Heart Failure Patients

Sovoleak Sreng (S4229422)

Last updated: 02 June, 2026

Introduction

Problem Statement

Dataset Descriptions

Data Preprocessing

# Import the dataset
heart <- read_csv("heart.csv")

# Check the dataset
head(heart, n=5)
# Convert binary variables use in this analysis to factor and label 
heart$DEATH_EVENT <- heart$DEATH_EVENT %>% 
      factor(levels=c(0,1), labels=c("Survived","Died"))
heart$high_blood_pressure <- heart$high_blood_pressure %>% 
      factor(levels = c(0,1), labels = c("No","Yes"))
levels(heart$DEATH_EVENT)
## [1] "Survived" "Died"
levels(heart$high_blood_pressure)
## [1] "No"  "Yes"

Data Preprocessing CONT

# Check missing values
colSums(is.na(heart)) -> table1

knitr::kable(table1)
x
age 0
anaemia 0
creatinine_phosphokinase 0
diabetes 0
ejection_fraction 0
high_blood_pressure 0
platelets 0
serum_creatinine 0
serum_sodium 0
sex 0
smoking 0
time 0
DEATH_EVENT 0

Descriptive Statistics and Visualizations

# Ejection_fraction summary statistic 
heart %>% group_by (DEATH_EVENT) %>%  summarise(Min = min(ejection_fraction, na.rm = TRUE),
                                                Q1 = quantile(ejection_fraction,probs = .25, na.rm = TRUE),
                                                Median = median(ejection_fraction, na.rm = TRUE),
                                                Mean = mean(ejection_fraction, na.rm = TRUE),
                                                Q3 = quantile(ejection_fraction,probs = .75, na.rm = TRUE),
                                                Max = max(ejection_fraction, na.rm = TRUE),
                                                SD = sd(ejection_fraction, na.rm = TRUE),
                                                n = n(),
                                                Missing = sum(is.na(ejection_fraction))) -> table2
knitr::kable(table2)
DEATH_EVENT Min Q1 Median Mean Q3 Max SD n Missing
Survived 17 35 38 40.26601 45 80 10.85996 203 0
Died 14 25 30 33.46875 38 70 12.52530 96 0

Descriptive Statistics and Visualizations CONT

# Frequency table for high_blood_pressure by Death_Event
heart %>% xtabs(~ high_blood_pressure + DEATH_EVENT, data = .) %>% prop.table(2) %>% addmargins() -> table3
knitr::kable(table3)
Survived Died Sum
No 0.6748768 0.59375 1.2686268
Yes 0.3251232 0.40625 0.7313732
Sum 1.0000000 1.00000 2.0000000

Descriptive Statistics and Visualizations CONT

# Box plot of Ejection_Fractions
boxplot(ejection_fraction ~ DEATH_EVENT, data = heart,
        ylab = "Ejection Fraction (%)", xlab = "Death Event",
        main = "Ejection Fraction by Death Event")

Decsriptive Statistics and Visualizations CONT

# Histogram of ejection_fraction to check skewness
hist(heart$ejection_fraction,
     main = "Distribution of Ejection Fraction", xlab = "Ejection Fraction (%)")
abline(v = mean(heart$ejection_fraction), col = "red", lwd = 2)
abline(v = median(heart$ejection_fraction), col = "blue", lwd = 2)

Hypothesis Testing

Two Sample test on the mean

Two Sample test on the mean CONT

# Checking assumptions
## Normality
ggqqplot(heart, x="ejection_fraction", facet.by = "DEATH_EVENT")

Two Sample test on the mean CONT

# Checking assumptions
leveneTest(ejection_fraction ~ DEATH_EVENT, data = heart)

Two Sample test on the mean CONT

# Conduct t-test with equal variance with 95% CI
t.test(ejection_fraction ~ DEATH_EVENT, data = heart, 
                var.equal = TRUE, 
                alternative = "two.sided")
## 
##  Two Sample t-test
## 
## data:  ejection_fraction by DEATH_EVENT
## t = 4.8056, df = 297, p-value = 2.453e-06
## alternative hypothesis: true difference in means between group Survived and group Died is not equal to 0
## 95 percent confidence interval:
##  4.013671 9.580849
## sample estimates:
## mean in group Survived     mean in group Died 
##               40.26601               33.46875

Hypthesis Testing Cont.

Chi-square Test of Association

# Checking assumption: No more than 25% of the cells in the contingency table should have expected frequencies less than 5
assume <- table(heart$high_blood_pressure,
             heart$DEATH_EVENT)

chi_assume <- chisq.test(assume)
chi_assume$observed
##      
##       Survived Died
##   No       137   57
##   Yes       66   39
chi_assume$expected
##      
##        Survived     Died
##   No  131.71237 62.28763
##   Yes  71.28763 33.71237

Chi-square Test of Association CONT

# P-value
chi_assume$p.value
## [1] 0.2141034

Discussion

References

AHA (American Heart Association) (2025) What is Heart Failure?, American Heart Association website, accessed 26 May 2026. https://www.heart.org/en/health-topics/heart-failure/what-is-heart-failure

UCI Machine Learning Repository (2020) Heart Failure Clinical Records, UCI Machine Learning Repository website, accessed 26 May 2026. https://archive.ics.uci.edu/dataset/519/heart-failure-clinical-records