#Membuat Tabel Kontingensi antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease)
#Import data
Data_HeartDisease <- read.csv("~/Documents/S1 Aida/Semester 2 2025/STIK4362 Analisis Data Kategorik/Tugas 1/Heart Disease.csv", header = TRUE, sep = ",")
head(Data_HeartDisease)
## Age Gender ChestPain BloodPressure Cholesterol BloodSugar
## 1 63 M typical angina 145 233 >120
## 2 67 M asymptotic 160 286 <=120
## 3 67 M asymptotic 120 229 <=120
## 4 37 M non-anginal pain 130 250 <=120
## 5 41 F atypical angina 130 204 <=120
## 6 56 M atypical angina 120 236 <=120
## Electrocardiographic HeartRate Exang ST slope Vessel Thalassemia
## 1 hyperthropy 150 no 2.3 down 0 fixed defect
## 2 hyperthropy 108 yes 1.5 flat 3 normal
## 3 hyperthropy 129 yes 2.6 flat 2 reversible defect
## 4 normal 187 no 3.5 down 0 normal
## 5 hyperthropy 172 no 1.4 up 0 normal
## 6 normal 178 no 0.8 up 0 normal
## HeartDisease
## 1 no
## 2 yes
## 3 yes
## 4 no
## 5 no
## 6 no
#Jumlah Data
nrow(Data_HeartDisease)
## [1] 299
#Mengecheck Struktur Data
str(Data_HeartDisease)
## 'data.frame': 299 obs. of 14 variables:
## $ Age : int 63 67 67 37 41 56 62 57 63 53 ...
## $ Gender : chr "M" "M" "M" "M" ...
## $ ChestPain : chr "typical angina" "asymptotic" "asymptotic" "non-anginal pain" ...
## $ BloodPressure : int 145 160 120 130 130 120 140 120 130 140 ...
## $ Cholesterol : int 233 286 229 250 204 236 268 354 254 203 ...
## $ BloodSugar : chr ">120" "<=120" "<=120" "<=120" ...
## $ Electrocardiographic: chr "hyperthropy" "hyperthropy" "hyperthropy" "normal" ...
## $ HeartRate : int 150 108 129 187 172 178 160 163 147 155 ...
## $ Exang : chr "no" "yes" "yes" "no" ...
## $ ST : num 2.3 1.5 2.6 3.5 1.4 0.8 3.6 0.6 1.4 3.1 ...
## $ slope : chr "down" "flat" "flat" "down" ...
## $ Vessel : int 0 3 2 0 0 0 2 0 1 0 ...
## $ Thalassemia : chr "fixed defect" "normal" "reversible defect" "normal" ...
## $ HeartDisease : chr "no" "yes" "yes" "no" ...
summary(Data_HeartDisease)
## Age Gender ChestPain BloodPressure
## Min. :29.00 Length:299 Length:299 Min. : 94.0
## 1st Qu.:48.00 Class :character Class :character 1st Qu.:120.0
## Median :56.00 Mode :character Mode :character Median :130.0
## Mean :54.53 Mean :131.7
## 3rd Qu.:61.00 3rd Qu.:140.0
## Max. :77.00 Max. :200.0
## Cholesterol BloodSugar Electrocardiographic HeartRate
## Min. :126.0 Length:299 Length:299 Min. : 71.0
## 1st Qu.:211.0 Class :character Class :character 1st Qu.:133.0
## Median :242.0 Mode :character Mode :character Median :153.0
## Mean :247.1 Mean :149.5
## 3rd Qu.:275.5 3rd Qu.:165.5
## Max. :564.0 Max. :202.0
## Exang ST slope Vessel
## Length:299 Min. :0.000 Length:299 Min. :0.0000
## Class :character 1st Qu.:0.000 Class :character 1st Qu.:0.0000
## Mode :character Median :0.800 Mode :character Median :0.0000
## Mean :1.052 Mean :0.6722
## 3rd Qu.:1.600 3rd Qu.:1.0000
## Max. :6.200 Max. :3.0000
## Thalassemia HeartDisease
## Length:299 Length:299
## Class :character Class :character
## Mode :character Mode :character
##
##
##
#Mengubah variabel kategori menjadi factor
#HeartDisease <- na.omit(trimws(Data_HeartDisease$HeartDisease))
#HeartDisease <- as.factor(HeartDisease)
HeartDisease <- factor(Data_HeartDisease$HeartDisease)
#Gender <- na.omit(trimws(Data_HeartDisease$Gender))
#Gender <- as.factor(Gender)
Gender <- factor(Data_HeartDisease$Gender)
#Tabel Kontingensi 2x2 antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease)
contingency_table <- table (Gender, HeartDisease)
cat("\n Tabel 1. Tabel Kontingensi 2x2 antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease) \n")
##
## Tabel 1. Tabel Kontingensi 2x2 antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease)
print(addmargins(contingency_table))
## HeartDisease
## Gender no yes Sum
## F 72 25 97
## M 89 113 202
## Sum 161 138 299
#menghitung nilai Chi-square
cat("\n Uji Chi-Square Hubungan antara jenis kelamin dan status penyakit jantung pada taraf nyata 5% \n")
##
## Uji Chi-Square Hubungan antara jenis kelamin dan status penyakit jantung pada taraf nyata 5%
chi <- chisq.test(contingency_table, correct = FALSE)
print (chi)
##
## Pearson's Chi-squared test
##
## data: contingency_table
## X-squared = 23.997, df = 1, p-value = 9.646e-07
#Frekuensi Harapan
cat("\n Tabel 2. Frekuensi Harapan \n")
##
## Tabel 2. Frekuensi Harapan
print(chi$expected)
## HeartDisease
## Gender no yes
## F 52.23077 44.76923
## M 108.76923 93.23077
#Menghitung nilai OR
OR <-((contingency_table[1,1]*contingency_table[2,2])/(contingency_table[1,2]*contingency_table[2,1]))
#Menampilkan hasil
cat("Nilai Odds Ratio", OR, "\n")
## Nilai Odds Ratio 3.656629
#Menghitung nilai OR dengan Package
library(epitools)
## Warning: package 'epitools' was built under R version 4.4.1
OR_Hasil <- oddsratio(contingency_table,method = "wald")
cat("Nilai Odds Ratio", OR_Hasil$measure[2,1], "\n")
## Nilai Odds Ratio 3.656629
#Model Regresi Logistik Biner
Vessel <- Data_HeartDisease$Vessel
HeartRate <- Data_HeartDisease$HeartRate
HeartDisease <- ifelse(Data_HeartDisease$HeartDisease == "yes", 1, 0)
Gender <- ifelse(Data_HeartDisease$Gender == "M", 1, 0)
dataRLB <- data.frame(HeartDisease, Gender, Vessel, HeartRate)
RLB <- glm(HeartDisease ~ Gender + HeartRate + Vessel, data = dataRLB, family = binomial)
#Summarize the model
summary (RLB)
##
## Call:
## glm(formula = HeartDisease ~ Gender + HeartRate + Vessel, family = binomial,
## data = dataRLB)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.079707 1.116123 3.655 0.000257 ***
## Gender 1.520229 0.333810 4.554 5.26e-06 ***
## HeartRate -0.040174 0.007406 -5.425 5.81e-08 ***
## Vessel 1.172716 0.192693 6.086 1.16e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 412.73 on 298 degrees of freedom
## Residual deviance: 283.79 on 295 degrees of freedom
## AIC: 291.79
##
## Number of Fisher Scoring iterations: 5
#Likelihood Ratio Test using anova function
anova(RLB, test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: HeartDisease
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 298 412.73
## Gender 1 24.848 297 387.88 6.205e-07 ***
## HeartRate 1 55.721 296 332.16 8.354e-14 ***
## Vessel 1 48.371 295 283.79 3.527e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fit the null model (tanpa variabel bebas)
null_model <- glm(HeartDisease ~ 1, data = dataRLB, family = binomial)
# Fit the full model (dengan variabel bebas)
full_model <- RLB
# Calculate log-likelihood values
loglik_null <- logLik(null_model)
loglik_null
## 'log Lik.' -206.3655 (df=1)
loglik_full <- logLik(full_model)
loglik_full
## 'log Lik.' -141.8957 (df=4)
# Perform likelihood ratio test
G <- -2*(loglik_null-loglik_full)
G
## 'log Lik.' 128.9396 (df=1)
# Perform likelihood ratio test using a package
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
likelihood_ratio_test <- lrtest(null_model, full_model)
likelihood_ratio_test
## Likelihood ratio test
##
## Model 1: HeartDisease ~ 1
## Model 2: HeartDisease ~ Gender + HeartRate + Vessel
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 1 -206.37
## 2 4 -141.90 3 128.94 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Interpretation of likelihood ratio test results
if (likelihood_ratio_test$"Pr(>Chisq)"[2] < 0.05)
{
cat("Reject the null hypothesis. The full model is significantly better than
the null model.\n")
} else {
cat("Fail to reject the null hypothesis. The null model is sufficient.\n")
}
## Reject the null hypothesis. The full model is significantly better than
## the null model.
#Wald Test
summary (RLB) #lihat Z Value dan P(|Z|)
##
## Call:
## glm(formula = HeartDisease ~ Gender + HeartRate + Vessel, family = binomial,
## data = dataRLB)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.079707 1.116123 3.655 0.000257 ***
## Gender 1.520229 0.333810 4.554 5.26e-06 ***
## HeartRate -0.040174 0.007406 -5.425 5.81e-08 ***
## Vessel 1.172716 0.192693 6.086 1.16e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 412.73 on 298 degrees of freedom
## Residual deviance: 283.79 on 295 degrees of freedom
## AIC: 291.79
##
## Number of Fisher Scoring iterations: 5
confint(RLB, level = 0.95)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 1.94495221 6.33577375
## Gender 0.88468750 2.19829584
## HeartRate -0.05532964 -0.02620644
## Vessel 0.81156423 1.56909195
#Wald Test (manual)
wald_Gender <- RLB$coefficients[2]/summary(RLB)$coefficient["Gender", "Std. Error"]
wald_Gender
## Gender
## 4.554173
p_value_Gender <- 1-pchisq(wald_Gender^2, df=1)
p_value_Gender
## Gender
## 5.259196e-06
wald_HR <- RLB$coefficients[3]/summary(RLB)$coefficient["HeartRate", "Std. Error"]
wald_HR
## HeartRate
## -5.424675
p_value_HR <- 1-pchisq(wald_HR^2, df=1)
p_value_HR
## HeartRate
## 5.806009e-08
wald_Vessel <- RLB$coefficients[4]/summary(RLB)$coefficient["Vessel", "Std. Error"]
wald_Vessel
## Vessel
## 6.085926
p_value_Vessel <- 1-pchisq(wald_Vessel^2, df=1)
p_value_Vessel
## Vessel
## 1.158198e-09
#Extract Coefficients
intercept <- round(RLB$coefficients[1], 4)
intercept
## (Intercept)
## 4.0797
coefficient_Gender <- round(RLB$coefficients[2], 4)
coefficient_Gender
## Gender
## 1.5202
coefficient_HeartRate <- round(RLB$coefficients[3], 4)
coefficient_HeartRate
## HeartRate
## -0.0402
coefficient_Vessel <- round(RLB$coefficients[4], 4)
coefficient_Vessel
## Vessel
## 1.1727
#Menghitung Nilai OR
odds_ratioGender <- exp(coefficient_Gender)
odds_ratioHeartRate <- exp(coefficient_HeartRate)
odds_ratioVessel <- exp(coefficient_Vessel)
#Print OR
print(paste("Odds Ratio Gender:", round(odds_ratioGender, 4)))
## [1] "Odds Ratio Gender: 4.5731"
print(paste("Odds Ratio HeartRate:", round(odds_ratioHeartRate, 4)))
## [1] "Odds Ratio HeartRate: 0.9606"
print(paste("Odds Ratio Vessel:", round(odds_ratioVessel, 4)))
## [1] "Odds Ratio Vessel: 3.2307"
#Intepretasi OR
cat("\ Tabel 3. Nilai Odds Ratio \n")
## Tabel 3. Nilai Odds Ratio
round(exp(cbind(OR = coef(RLB), confint(RLB))), 4)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 59.1281 6.9933 564.4059
## Gender 4.5733 2.4222 9.0096
## HeartRate 0.9606 0.9462 0.9741
## Vessel 3.2308 2.2514 4.8023