Tahap pra pemrosesan data digunakan untuk mengecek kualitas data dan validitas analisis statistik yang akan dilakukan.
tidyverse: digunakan untuk manipulasi data; DecsTools: digunakan untuk fungsi Winsorize, meskipun dalam skrip digunakan fungsi winsorisasi manual
#install.packages("tidyverse")
#install.packages("DescTools")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(DescTools) # untuk Winsorize
## Warning: package 'DescTools' was built under R version 4.4.3
data <- read.csv("heart.csv", stringsAsFactors = FALSE)
head(data)
## Age Sex ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR
## 1 40 M ATA 140 289 0 Normal 172
## 2 49 F NAP 160 180 0 Normal 156
## 3 37 M ATA 130 283 0 ST 98
## 4 48 F ASY 138 214 0 Normal 108
## 5 54 M NAP 150 195 0 Normal 122
## 6 39 M NAP 120 339 0 Normal 170
## ExerciseAngina Oldpeak ST_Slope HeartDisease
## 1 N 0.0 Up 0
## 2 N 1.0 Flat 1
## 3 N 0.0 Up 0
## 4 Y 1.5 Flat 1
## 5 N 0.0 Up 0
## 6 N 0.0 Up 0
str(data)
## 'data.frame': 918 obs. of 12 variables:
## $ Age : int 40 49 37 48 54 39 45 54 37 48 ...
## $ Sex : chr "M" "F" "M" "F" ...
## $ ChestPainType : chr "ATA" "NAP" "ATA" "ASY" ...
## $ RestingBP : int 140 160 130 138 150 120 130 110 140 120 ...
## $ Cholesterol : int 289 180 283 214 195 339 237 208 207 284 ...
## $ FastingBS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RestingECG : chr "Normal" "Normal" "ST" "Normal" ...
## $ MaxHR : int 172 156 98 108 122 170 170 142 130 120 ...
## $ ExerciseAngina: chr "N" "N" "N" "Y" ...
## $ Oldpeak : num 0 1 0 1.5 0 0 0 0 1.5 0 ...
## $ ST_Slope : chr "Up" "Flat" "Up" "Flat" ...
## $ HeartDisease : int 0 1 0 1 0 0 0 0 1 0 ...
summary(data)
## Age Sex ChestPainType RestingBP
## Min. :28.00 Length:918 Length:918 Min. : 0.0
## 1st Qu.:47.00 Class :character Class :character 1st Qu.:120.0
## Median :54.00 Mode :character Mode :character Median :130.0
## Mean :53.51 Mean :132.4
## 3rd Qu.:60.00 3rd Qu.:140.0
## Max. :77.00 Max. :200.0
## Cholesterol FastingBS RestingECG MaxHR
## Min. : 0.0 Min. :0.0000 Length:918 Min. : 60.0
## 1st Qu.:173.2 1st Qu.:0.0000 Class :character 1st Qu.:120.0
## Median :223.0 Median :0.0000 Mode :character Median :138.0
## Mean :198.8 Mean :0.2331 Mean :136.8
## 3rd Qu.:267.0 3rd Qu.:0.0000 3rd Qu.:156.0
## Max. :603.0 Max. :1.0000 Max. :202.0
## ExerciseAngina Oldpeak ST_Slope HeartDisease
## Length:918 Min. :-2.6000 Length:918 Min. :0.0000
## Class :character 1st Qu.: 0.0000 Class :character 1st Qu.:0.0000
## Mode :character Median : 0.6000 Mode :character Median :1.0000
## Mean : 0.8874 Mean :0.5534
## 3rd Qu.: 1.5000 3rd Qu.:1.0000
## Max. : 6.2000 Max. :1.0000
cat("Jumlah missing values per kolom:n")
## Jumlah missing values per kolom:n
print(colSums(is.na(data)))
## Age Sex ChestPainType RestingBP Cholesterol
## 0 0 0 0 0
## FastingBS RestingECG MaxHR ExerciseAngina Oldpeak
## 0 0 0 0 0
## ST_Slope HeartDisease
## 0 0
# Deteksi kolom numerik dan kategorikal
numeric_vars <- names(data)[sapply(data, is.numeric)]
categorical_vars <- names(data)[sapply(data, is.character)]
# Imputasi numerik dengan median
for (col in numeric_vars) {
median_value <- median(data[[col]], na.rm = TRUE)
data[[col]][is.na(data[[col]])] <- median_value
}
# Imputasi kategorikal dengan modus
get_mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
for (col in categorical_vars) {
mode_value <- get_mode(data[[col]])
data[[col]][is.na(data[[col]])] <- mode_value
}
## Mapping untuk kolom 'Sex':n 1 = F
## 2 = M
## nMapping untuk kolom 'ChestPainType':n 1 = ASY
## 2 = ATA
## 3 = NAP
## 4 = TA
## nMapping untuk kolom 'RestingECG':n 1 = LVH
## 2 = Normal
## 3 = ST
## nMapping untuk kolom 'ExerciseAngina':n 1 = N
## 2 = Y
## nMapping untuk kolom 'ST_Slope':n 1 = Down
## 2 = Flat
## 3 = Up
## n
## Age Sex ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR
## 1 40 2 2 140 289 0 2 172
## 2 49 1 3 160 180 0 2 156
## 3 37 2 2 130 283 0 3 98
## 4 48 1 1 138 214 0 2 108
## 5 54 2 3 150 195 0 2 122
## 6 39 2 3 120 339 0 2 170
## ExerciseAngina Oldpeak ST_Slope HeartDisease
## 1 1 0.0 3 0
## 2 1 1.0 2 1
## 3 1 0.0 3 0
## 4 2 1.5 2 1
## 5 1 0.0 3 0
## 6 1 0.0 3 0
winsorize_manual <- function(x, lower = 0.01, upper = 0.99) {
qnt <- quantile(x, probs = c(lower, upper), na.rm = TRUE)
x[x < qnt[1]] <- qnt[1]
x[x > qnt[2]] <- qnt[2]
return(x)
}
for (col in numeric_vars) {
data[[col]] <- winsorize_manual(data[[col]])
}
write.csv(data, "heart_preprocessed.csv", row.names = FALSE)
str(data)
## 'data.frame': 918 obs. of 12 variables:
## $ Age : num 40 49 37 48 54 39 45 54 37 48 ...
## $ Sex : int 2 1 2 1 2 2 1 2 2 1 ...
## $ ChestPainType : int 2 3 2 1 3 3 2 2 1 2 ...
## $ RestingBP : num 140 160 130 138 150 120 130 110 140 120 ...
## $ Cholesterol : num 289 180 283 214 195 339 237 208 207 284 ...
## $ FastingBS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ RestingECG : int 2 2 3 2 2 2 2 2 2 2 ...
## $ MaxHR : num 172 156 98 108 122 170 170 142 130 120 ...
## $ ExerciseAngina: int 1 1 1 2 1 1 1 1 2 1 ...
## $ Oldpeak : num 0 1 0 1.5 0 0 0 0 1.5 0 ...
## $ ST_Slope : int 3 2 3 2 3 3 3 3 2 3 ...
## $ HeartDisease : num 0 1 0 1 0 0 0 0 1 0 ...
summary(data)
## Age Sex ChestPainType RestingBP Cholesterol
## Min. :32.00 Min. :1.00 Min. :1.000 Min. : 95.0 Min. : 0.0
## 1st Qu.:47.00 1st Qu.:2.00 1st Qu.:1.000 1st Qu.:120.0 1st Qu.:173.2
## Median :54.00 Median :2.00 Median :1.000 Median :130.0 Median :223.0
## Mean :53.52 Mean :1.79 Mean :1.781 Mean :132.4 Mean :197.9
## 3rd Qu.:60.00 3rd Qu.:2.00 3rd Qu.:3.000 3rd Qu.:140.0 3rd Qu.:267.0
## Max. :74.00 Max. :2.00 Max. :4.000 Max. :180.0 Max. :411.5
## FastingBS RestingECG MaxHR ExerciseAngina
## Min. :0.0000 Min. :1.000 Min. : 77.17 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:120.00 1st Qu.:1.000
## Median :0.0000 Median :2.000 Median :138.00 Median :1.000
## Mean :0.2331 Mean :1.989 Mean :136.84 Mean :1.404
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:156.00 3rd Qu.:2.000
## Max. :1.0000 Max. :3.000 Max. :186.00 Max. :2.000
## Oldpeak ST_Slope HeartDisease
## Min. :-0.500 Min. :1.000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:2.000 1st Qu.:0.0000
## Median : 0.600 Median :2.000 Median :1.0000
## Mean : 0.889 Mean :2.362 Mean :0.5534
## 3rd Qu.: 1.500 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. : 4.000 Max. :3.000 Max. :1.0000
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(dplyr)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
Distribusi Usia Pasien
ggplot(data, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "steelblue", color = "black") +
labs(title = "Distribusi Usia Pasien", x = "Usia", y = "Frekuensi")
Distribusi Kadar Kolesterol
ggplot(data, aes(x = Cholesterol)) +
geom_histogram(binwidth = 20, fill = "darkorange", color = "black") +
labs(title = "Distribusi Kadar Kolesterol", x = "Kolesterol (mg/dl)", y = "Frekuensi")
Kadar Kolesterol Berdasarkan Status Penyakit Jantung
ggplot(data, aes(x = as.factor(HeartDisease), y = Cholesterol, fill = as.factor(HeartDisease))) +
geom_boxplot() +
labs(title = "Kolesterol berdasarkan Status Penyakit Jantung",
x = "Heart Disease (0 = Tidak, 1 = Ya)", y = "Kolesterol (mg/dl)") +
scale_fill_manual(values = c("skyblue", "tomato")) +
theme_minimal()
Detak Jantung Maksimum Berdasarkan Tipe Nyeri Dada
ggplot(data, aes(x = ChestPainType, y = MaxHR, fill = ChestPainType)) +
geom_boxplot() +
labs(title = "Detak Jantung Maksimum Berdasarkan Tipe Nyeri Dada",
x = "Tipe Nyeri Dada", y = "Detak Jantung Maksimum") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Korelasi Matriks Heatmap ini memperlihatkan korelasi antar variabel numerik. Beberapa insight penting: - Korelasi negatif antara Age dan MaxHR (-0.38), menunjukkan bahwa semakin tua usia pasien, semakin rendah detak jantung maksimum yang dicapai. - Korelasi positif antara Age dan Oldpeak (0.26), mengindikasikan bahwa usia yang lebih tinggi cenderung diikuti dengan depresi segmen ST yang lebih besar. - Korelasi antar variabel lain relatif rendah, menandakan variabel-variabel tersebut cukup independen satu sama lain.
# Pastikan hanya variabel numerik
numeric_data <- data %>%
select(Age, RestingBP, Cholesterol,
MaxHR, Oldpeak)
# Hitung matriks korelasi
corr_matrix <- cor(numeric_data, use = "complete.obs")
# Tampilkan plot korelasi
corrplot::corrplot(corr_matrix, method = "color", addCoef.col = "black",
tl.col = "black", number.cex = 0.8, tl.cex = 0.9)
STATISTIK DESKRIPTIF
library(dplyr)
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:DescTools':
##
## AUC, ICC, SD
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Statistik deskriptif variabel numerik
data %>%
select(Age, RestingBP, Cholesterol, MaxHR, Oldpeak) %>%
psych::describe()
## vars n mean sd median trimmed mad min max range
## Age 1 918 53.52 9.35 54.0 53.71 10.38 32.00 74.00 42.00
## RestingBP 2 918 132.39 17.58 130.0 131.50 14.83 95.00 180.00 85.00
## Cholesterol 3 918 197.92 107.14 223.0 204.41 68.20 0.00 411.49 411.49
## MaxHR 4 918 136.84 25.12 138.0 137.23 26.69 77.17 186.00 108.83
## Oldpeak 5 918 0.89 1.03 0.6 0.74 0.89 -0.50 4.00 4.50
## skew kurtosis se
## Age -0.19 -0.50 0.31
## RestingBP 0.46 0.14 0.58
## Cholesterol -0.79 -0.26 3.54
## MaxHR -0.12 -0.64 0.83
## Oldpeak 0.96 0.18 0.03
# Statistik deskriptif variabel kategorikal
# Frekuensi dan persentase Sex
table(data$Sex)
##
## 1 2
## 193 725
prop.table(table(data$Sex)) * 100
##
## 1 2
## 21.02397 78.97603
# Frekuensi dan persentase ChestPainType
table(data$ChestPainType)
##
## 1 2 3 4
## 496 173 203 46
prop.table(table(data$ChestPainType)) * 100
##
## 1 2 3 4
## 54.030501 18.845316 22.113290 5.010893
# Frekuensi dan persentase HeartDisease
table(data$HeartDisease)
##
## 0 1
## 410 508
prop.table(table(data$HeartDisease)) * 100
##
## 0 1
## 44.66231 55.33769
Load dateset, dimana menggunakan dataset yang telah di pre-processing sebelumnya
## Age Sex ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR
## 1 40 2 2 140 289 0 2 172
## 2 49 1 3 160 180 0 2 156
## 3 37 2 2 130 283 0 3 98
## 4 48 1 1 138 214 0 2 108
## 5 54 2 3 150 195 0 2 122
## 6 39 2 3 120 339 0 2 170
## ExerciseAngina Oldpeak ST_Slope HeartDisease
## 1 1 0.0 3 0
## 2 1 1.0 2 1
## 3 1 0.0 3 0
## 4 2 1.5 2 1
## 5 1 0.0 3 0
## 6 1 0.0 3 0
Memastikan variabel kategorikal sebagai faktor
data$Sex <- as.factor(data$Sex)
data$ChestPainType <- as.factor(data$ChestPainType)
shapiro.test(data$RestingBP)
##
## Shapiro-Wilk normality test
##
## data: data$RestingBP
## W = 0.97373, p-value = 8.17e-12
shapiro.test(data$Cholesterol)
##
## Shapiro-Wilk normality test
##
## data: data$Cholesterol
## W = 0.86304, p-value < 2.2e-16
shapiro.test(data$MaxHR)
##
## Shapiro-Wilk normality test
##
## data: data$MaxHR
## W = 0.9873, p-value = 3.845e-07
shapiro.test(data$Oldpeak)
##
## Shapiro-Wilk normality test
##
## data: data$Oldpeak
## W = 0.8483, p-value < 2.2e-16
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
data_dep <- data[, c("RestingBP", "Cholesterol", "MaxHR", "Oldpeak")]
mardia_result <- mvn(data = data_dep, mvnTest = "mardia")
print(mardia_result)
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 339.235348809589 7.32799423733464e-60 NO
## 2 Mardia Kurtosis 0.488220820781849 0.625393438355976 YES
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling RestingBP 6.9736 <0.001 NO
## 2 Anderson-Darling Cholesterol 51.7027 <0.001 NO
## 3 Anderson-Darling MaxHR 2.0862 <0.001 NO
## 4 Anderson-Darling Oldpeak 51.1834 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th
## RestingBP 918 132.3943355 17.578404 130.0 95.00 180.00 120.00 140.0
## Cholesterol 918 197.9160131 107.140351 223.0 0.00 411.49 173.25 267.0
## MaxHR 918 136.8373638 25.122101 138.0 77.17 186.00 120.00 156.0
## Oldpeak 918 0.8889978 1.029096 0.6 -0.50 4.00 0.00 1.5
## Skew Kurtosis
## RestingBP 0.4644577 0.1436962
## Cholesterol -0.7889810 -0.2554400
## MaxHR -0.1164530 -0.6434004
## Oldpeak 0.9629329 0.1783312
#install.packages("biotools")
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## ---
## biotools version 4.3
# Box’s M Test dengan kombinasi faktor
boxM(data[, c("RestingBP", "Cholesterol", "MaxHR", "Oldpeak")], interaction(data$Sex, data$ChestPainType))
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data[, c("RestingBP", "Cholesterol", "MaxHR", "Oldpeak")]
## Chi-Sq (approx.) = 224.99, df = 70, p-value < 2.2e-16
cor(data[, c("RestingBP", "Cholesterol", "MaxHR", "Oldpeak")])
## RestingBP Cholesterol MaxHR Oldpeak
## RestingBP 1.0000000 0.10140810 -0.1113279 0.16663337
## Cholesterol 0.1014081 1.00000000 0.2409049 0.04022584
## MaxHR -0.1113279 0.24090495 1.0000000 -0.17228264
## Oldpeak 0.1666334 0.04022584 -0.1722826 1.00000000
Uji Linieritas (Mancova). Uji memastikan bahwa kovariat
berhubungan secara linear dengan masing-masing variabel dependen.
Terdapat hubungan linear antara kovariat Age dan variabel dependen.
Sehingga asumsi linearitas terpenuhi. Linearitas adalah syarat penting
dalam MANCOVA, karena model memprediksi rata-rata linier dari kovariat.
Homogenitas Regresi. Uji ini memastikan bahwa hubungan antara kovariat dan dependen tidak berubah antar grup (homogen).
## Analysis of Variance Table
##
## Df Pillai approx F num Df den Df Pr(>F)
## (Intercept) 1 0.99053 23514.2 4 899 < 2.2e-16 ***
## Age 1 0.25890 78.5 4 899 < 2.2e-16 ***
## Sex 1 0.08135 19.9 4 899 1.026e-15 ***
## ChestPainType 3 0.17615 14.1 12 2703 < 2.2e-16 ***
## Age:Sex 1 0.01478 3.4 4 899 0.009469 **
## Age:ChestPainType 3 0.01520 1.1 12 2703 0.316313
## Sex:ChestPainType 3 0.01843 1.4 12 2703 0.161694
## Age:Sex:ChestPainType 3 0.00965 0.7 12 2703 0.725999
## Residuals 902
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Transformasi Z-score
data$zRestingBP <- scale(data$RestingBP)
data$zCholesterol <- scale(data$Cholesterol)
data$zMaxHR <- scale(data$MaxHR)
data$zOldpeak <- scale(data$Oldpeak)
Uji normalitas univariat setelah transformasi. Shapiro-Wilk Test dan Anderson Darling Test pada semua variabel yaitu zRestingBP, zCholesterol, zMaxHR, zOldpeak menunjukkan nilai p-value < 0.05. Hal ini berarti semua variabel tidak berdistribusi normal, meskipun sudah dilakukan transformasi standarisasi (z-score). Maka asumsi normalitas univariat belum terpenuhi.
shapiro.test(data$zRestingBP)
##
## Shapiro-Wilk normality test
##
## data: data$zRestingBP
## W = 0.97373, p-value = 8.17e-12
shapiro.test(data$zCholesterol)
##
## Shapiro-Wilk normality test
##
## data: data$zCholesterol
## W = 0.86304, p-value < 2.2e-16
shapiro.test(data$zMaxHR)
##
## Shapiro-Wilk normality test
##
## data: data$zMaxHR
## W = 0.9873, p-value = 3.845e-07
shapiro.test(data$zOldpeak)
##
## Shapiro-Wilk normality test
##
## data: data$zOldpeak
## W = 0.8483, p-value < 2.2e-16
library(MVN)
mvn(data = data[,c("zRestingBP", "zCholesterol", "zMaxHR", "zOldpeak")], mvnTest = "mardia")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 339.235348809589 7.32799423733444e-60 NO
## 2 Mardia Kurtosis 0.488220820781849 0.625393438355976 YES
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling zRestingBP 6.9736 <0.001 NO
## 2 Anderson-Darling zCholesterol 51.7027 <0.001 NO
## 3 Anderson-Darling zMaxHR 2.0862 <0.001 NO
## 4 Anderson-Darling zOldpeak 51.1834 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max
## zRestingBP 918 -2.525833e-16 1 -0.13620893 -2.127288 2.708190
## zCholesterol 918 -7.712040e-17 1 0.23412269 -1.847259 1.993404
## zMaxHR 918 -1.039445e-16 1 0.04627942 -2.375094 1.956948
## zOldpeak 918 -3.246170e-17 1 -0.28082681 -1.349726 3.023043
## 25th 75th Skew Kurtosis
## zRestingBP -0.7050888 0.4326709 0.4644577 0.1436962
## zCholesterol -0.2302215 0.6447990 -0.7889810 -0.2554400
## zMaxHR -0.6702212 0.7627800 -0.1164530 -0.6434004
## zOldpeak -0.8638626 0.5937269 0.9629329 0.1783312
# 3. Box's M Test (Homogenitas kovarians setelah standarisasi)
boxM(data[,c("zRestingBP", "zCholesterol", "zMaxHR", "zOldpeak")], data$Sex)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data[, c("zRestingBP", "zCholesterol", "zMaxHR", "zOldpeak")]
## Chi-Sq (approx.) = 41.228, df = 10, p-value = 1.028e-05
## Age Sex ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR
## 1 40 2 2 140 289 0 2 172
## 2 49 1 3 160 180 0 2 156
## 3 37 2 2 130 283 0 3 98
## 4 48 1 1 138 214 0 2 108
## 5 54 2 3 150 195 0 2 122
## 6 39 2 3 120 339 0 2 170
## ExerciseAngina Oldpeak ST_Slope HeartDisease
## 1 1 0.0 3 0
## 2 1 1.0 2 1
## 3 1 0.0 3 0
## 4 2 1.5 2 1
## 5 1 0.0 3 0
## 6 1 0.0 3 0
## 'data.frame': 918 obs. of 12 variables:
## $ Age : int 40 49 37 48 54 39 45 54 37 48 ...
## $ Sex : int 2 1 2 1 2 2 1 2 2 1 ...
## $ ChestPainType : int 2 3 2 1 3 3 2 2 1 2 ...
## $ RestingBP : int 140 160 130 138 150 120 130 110 140 120 ...
## $ Cholesterol : num 289 180 283 214 195 339 237 208 207 284 ...
## $ FastingBS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RestingECG : int 2 2 3 2 2 2 2 2 2 2 ...
## $ MaxHR : num 172 156 98 108 122 170 170 142 130 120 ...
## $ ExerciseAngina: int 1 1 1 2 1 1 1 1 2 1 ...
## $ Oldpeak : num 0 1 0 1.5 0 0 0 0 1.5 0 ...
## $ ST_Slope : int 3 2 3 2 3 3 3 3 2 3 ...
## $ HeartDisease : int 0 1 0 1 0 0 0 0 1 0 ...
manova_model <- manova(cbind(RestingBP, Cholesterol, MaxHR, Oldpeak) ~ Sex * ChestPainType, data = data)
manova_model
## Call:
## manova(cbind(RestingBP, Cholesterol, MaxHR, Oldpeak) ~ Sex *
## ChestPainType, data = data)
##
## Terms:
## Sex ChestPainType Sex:ChestPainType Residuals
## RestingBP 36 39 263 283015
## Cholesterol 434566 20802 6546 10064380
## MaxHR 20913 41598 2381 513845
## Oldpeak 13 27 2 929
## Deg. of Freedom 1 1 1 914
##
## Residual standard errors: 17.59671 104.935 23.71063 1.008412
## Estimated effects may be unbalanced
summary(manova_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Sex 1 0.076208 18.7882 4 911 7.445e-15 ***
## ChestPainType 1 0.092459 23.2028 4 911 < 2.2e-16 ***
## Sex:ChestPainType 1 0.008017 1.8405 4 911 0.1189
## Residuals 914
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(manova_model)
## Response RestingBP :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 36 36.030 0.1164 0.7331
## ChestPainType 1 39 39.131 0.1264 0.7223
## Sex:ChestPainType 1 263 263.251 0.8502 0.3567
## Residuals 914 283015 309.644
##
## Response Cholesterol :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 434566 434566 39.4652 5.16e-10 ***
## ChestPainType 1 20802 20802 1.8892 0.1696
## Sex:ChestPainType 1 6546 6546 0.5945 0.4409
## Residuals 914 10064380 11011
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response MaxHR :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 20913 20913 37.1986 1.573e-09 ***
## ChestPainType 1 41598 41598 73.9928 < 2.2e-16 ***
## Sex:ChestPainType 1 2381 2381 4.2344 0.0399 *
## Residuals 914 513845 562
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Oldpeak :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 13.10 13.0951 12.8775 0.0003502 ***
## ChestPainType 1 27.02 27.0173 26.5684 3.118e-07 ***
## Sex:ChestPainType 1 1.58 1.5840 1.5577 0.2123180
## Residuals 914 929.44 1.0169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#install.packages("ggplot2")
library(ggplot2)
ggplot(data, aes(x = ChestPainType, y = MaxHR)) +
geom_boxplot() +
stat_summary(fun = mean, geom = "point", shape = 20, size = 3, color = "red") +
facet_wrap(~Sex) +
labs(
title = "MaxHR berdasarkan ChestPainType untuk Setiap Jenis Kelamin",
x = "Chest Pain Type",
y = "Maximum Heart Rate"
) +
theme_minimal()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## 'data.frame': 918 obs. of 12 variables:
## $ Age : int 40 49 37 48 54 39 45 54 37 48 ...
## $ Sex : int 2 1 2 1 2 2 1 2 2 1 ...
## $ ChestPainType : int 2 3 2 1 3 3 2 2 1 2 ...
## $ RestingBP : int 140 160 130 138 150 120 130 110 140 120 ...
## $ Cholesterol : num 289 180 283 214 195 339 237 208 207 284 ...
## $ FastingBS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RestingECG : int 2 2 3 2 2 2 2 2 2 2 ...
## $ MaxHR : num 172 156 98 108 122 170 170 142 130 120 ...
## $ ExerciseAngina: int 1 1 1 2 1 1 1 1 2 1 ...
## $ Oldpeak : num 0 1 0 1.5 0 0 0 0 1.5 0 ...
## $ ST_Slope : int 3 2 3 2 3 3 3 3 2 3 ...
## $ HeartDisease : int 0 1 0 1 0 0 0 0 1 0 ...
## 'data.frame': 918 obs. of 12 variables:
## $ Age : int 40 49 37 48 54 39 45 54 37 48 ...
## $ Sex : Factor w/ 2 levels "1","2": 2 1 2 1 2 2 1 2 2 1 ...
## $ ChestPainType : Factor w/ 4 levels "1","2","3","4": 2 3 2 1 3 3 2 2 1 2 ...
## $ RestingBP : int 140 160 130 138 150 120 130 110 140 120 ...
## $ Cholesterol : num 289 180 283 214 195 339 237 208 207 284 ...
## $ FastingBS : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RestingECG : int 2 2 3 2 2 2 2 2 2 2 ...
## $ MaxHR : num 172 156 98 108 122 170 170 142 130 120 ...
## $ ExerciseAngina: int 1 1 1 2 1 1 1 1 2 1 ...
## $ Oldpeak : num 0 1 0 1.5 0 0 0 0 1.5 0 ...
## $ ST_Slope : int 3 2 3 2 3 3 3 3 2 3 ...
## $ HeartDisease : int 0 1 0 1 0 0 0 0 1 0 ...
mancova_model <- manova(cbind(RestingBP, Cholesterol, MaxHR, Oldpeak) ~ Sex + ChestPainType + Age, data = heart_data)
mancova_model
## Call:
## manova(cbind(RestingBP, Cholesterol, MaxHR, Oldpeak) ~ Sex +
## ChestPainType + Age, data = heart_data)
##
## Terms:
## Sex ChestPainType Age Residuals
## RestingBP 36 1813 19223 262281
## Cholesterol 434566 180530 36845 9874353
## MaxHR 20913 65090 58749 433985
## Oldpeak 13 94 37 827
## Deg. of Freedom 1 3 1 912
##
## Residual standard errors: 16.95844 104.0536 21.81424 0.9520923
## Estimated effects may be unbalanced
summary(mancova_model, test = "Pillai")
## Df Pillai approx F num Df den Df Pr(>F)
## Sex 1 0.092131 23.062 4 909 < 2.2e-16 ***
## ChestPainType 3 0.229537 18.869 12 2733 < 2.2e-16 ***
## Age 1 0.196211 55.473 4 909 < 2.2e-16 ***
## Residuals 912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(mancova_model)
## Response RestingBP :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 36 36.0 0.1253 0.7235
## ChestPainType 3 1813 604.3 2.1014 0.0985 .
## Age 1 19223 19223.3 66.8430 9.793e-16 ***
## Residuals 912 262281 287.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Cholesterol :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 434566 434566 40.1367 3.715e-10 ***
## ChestPainType 3 180530 60177 5.5579 0.0008818 ***
## Age 1 36845 36845 3.4030 0.0654015 .
## Residuals 912 9874353 10827
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response MaxHR :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 20913 20913 43.947 5.767e-11 ***
## ChestPainType 3 65090 21697 45.594 < 2.2e-16 ***
## Age 1 58749 58749 123.459 < 2.2e-16 ***
## Residuals 912 433985 476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Oldpeak :
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 13.10 13.095 14.446 0.0001538 ***
## ChestPainType 3 94.25 31.418 34.659 < 2.2e-16 ***
## Age 1 37.08 37.080 40.906 2.548e-10 ***
## Residuals 912 826.71 0.906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1