Sovoleak Sreng (S4229422)
Last updated: 02 June, 2026
# Convert binary variables use in this analysis to factor and label
heart$DEATH_EVENT <- heart$DEATH_EVENT %>%
factor(levels=c(0,1), labels=c("Survived","Died"))
heart$high_blood_pressure <- heart$high_blood_pressure %>%
factor(levels = c(0,1), labels = c("No","Yes"))
levels(heart$DEATH_EVENT)## [1] "Survived" "Died"
## [1] "No" "Yes"
| x | |
|---|---|
| age | 0 |
| anaemia | 0 |
| creatinine_phosphokinase | 0 |
| diabetes | 0 |
| ejection_fraction | 0 |
| high_blood_pressure | 0 |
| platelets | 0 |
| serum_creatinine | 0 |
| serum_sodium | 0 |
| sex | 0 |
| smoking | 0 |
| time | 0 |
| DEATH_EVENT | 0 |
# Ejection_fraction summary statistic
heart %>% group_by (DEATH_EVENT) %>% summarise(Min = min(ejection_fraction, na.rm = TRUE),
Q1 = quantile(ejection_fraction,probs = .25, na.rm = TRUE),
Median = median(ejection_fraction, na.rm = TRUE),
Mean = mean(ejection_fraction, na.rm = TRUE),
Q3 = quantile(ejection_fraction,probs = .75, na.rm = TRUE),
Max = max(ejection_fraction, na.rm = TRUE),
SD = sd(ejection_fraction, na.rm = TRUE),
n = n(),
Missing = sum(is.na(ejection_fraction))) -> table2
knitr::kable(table2)| DEATH_EVENT | Min | Q1 | Median | Mean | Q3 | Max | SD | n | Missing |
|---|---|---|---|---|---|---|---|---|---|
| Survived | 17 | 35 | 38 | 40.26601 | 45 | 80 | 10.85996 | 203 | 0 |
| Died | 14 | 25 | 30 | 33.46875 | 38 | 70 | 12.52530 | 96 | 0 |
# Frequency table for high_blood_pressure by Death_Event
heart %>% xtabs(~ high_blood_pressure + DEATH_EVENT, data = .) %>% prop.table(2) %>% addmargins() -> table3
knitr::kable(table3)| Survived | Died | Sum | |
|---|---|---|---|
| No | 0.6748768 | 0.59375 | 1.2686268 |
| Yes | 0.3251232 | 0.40625 | 0.7313732 |
| Sum | 1.0000000 | 1.00000 | 2.0000000 |
# Box plot of Ejection_Fractions
boxplot(ejection_fraction ~ DEATH_EVENT, data = heart,
ylab = "Ejection Fraction (%)", xlab = "Death Event",
main = "Ejection Fraction by Death Event")# Histogram of ejection_fraction to check skewness
hist(heart$ejection_fraction,
main = "Distribution of Ejection Fraction", xlab = "Ejection Fraction (%)")
abline(v = mean(heart$ejection_fraction), col = "red", lwd = 2)
abline(v = median(heart$ejection_fraction), col = "blue", lwd = 2)# Checking assumptions
## Normality
ggqqplot(heart, x="ejection_fraction", facet.by = "DEATH_EVENT")# Conduct t-test with equal variance with 95% CI
t.test(ejection_fraction ~ DEATH_EVENT, data = heart,
var.equal = TRUE,
alternative = "two.sided")##
## Two Sample t-test
##
## data: ejection_fraction by DEATH_EVENT
## t = 4.8056, df = 297, p-value = 2.453e-06
## alternative hypothesis: true difference in means between group Survived and group Died is not equal to 0
## 95 percent confidence interval:
## 4.013671 9.580849
## sample estimates:
## mean in group Survived mean in group Died
## 40.26601 33.46875
# Checking assumption: No more than 25% of the cells in the contingency table should have expected frequencies less than 5
assume <- table(heart$high_blood_pressure,
heart$DEATH_EVENT)
chi_assume <- chisq.test(assume)
chi_assume$observed##
## Survived Died
## No 137 57
## Yes 66 39
##
## Survived Died
## No 131.71237 62.28763
## Yes 71.28763 33.71237
## [1] 0.2141034
AHA (American Heart Association) (2025) What is Heart Failure?, American Heart Association website, accessed 26 May 2026. https://www.heart.org/en/health-topics/heart-failure/what-is-heart-failure
UCI Machine Learning Repository (2020) Heart Failure Clinical Records, UCI Machine Learning Repository website, accessed 26 May 2026. https://archive.ics.uci.edu/dataset/519/heart-failure-clinical-records