#Membuat Tabel Kontingensi antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease)
#Import data
Data_HeartDisease <- read.csv("~/Documents/S1 Aida/Semester 2 2025/STIK4362 Analisis Data Kategorik/Tugas 1/Heart Disease.csv", header = TRUE, sep = ",")
head(Data_HeartDisease)
##   Age Gender        ChestPain BloodPressure Cholesterol BloodSugar
## 1  63      M   typical angina           145         233       >120
## 2  67      M       asymptotic           160         286      <=120
## 3  67      M       asymptotic           120         229      <=120
## 4  37      M non-anginal pain           130         250      <=120
## 5  41      F  atypical angina           130         204      <=120
## 6  56      M  atypical angina           120         236      <=120
##   Electrocardiographic HeartRate Exang  ST slope Vessel       Thalassemia
## 1          hyperthropy       150    no 2.3  down      0      fixed defect
## 2          hyperthropy       108   yes 1.5  flat      3            normal
## 3          hyperthropy       129   yes 2.6  flat      2 reversible defect
## 4               normal       187    no 3.5  down      0            normal
## 5          hyperthropy       172    no 1.4    up      0            normal
## 6               normal       178    no 0.8    up      0            normal
##   HeartDisease
## 1           no
## 2          yes
## 3          yes
## 4           no
## 5           no
## 6           no
#Jumlah Data
nrow(Data_HeartDisease)
## [1] 299
#Mengecheck Struktur Data
str(Data_HeartDisease)
## 'data.frame':    299 obs. of  14 variables:
##  $ Age                 : int  63 67 67 37 41 56 62 57 63 53 ...
##  $ Gender              : chr  "M" "M" "M" "M" ...
##  $ ChestPain           : chr  "typical angina" "asymptotic" "asymptotic" "non-anginal pain" ...
##  $ BloodPressure       : int  145 160 120 130 130 120 140 120 130 140 ...
##  $ Cholesterol         : int  233 286 229 250 204 236 268 354 254 203 ...
##  $ BloodSugar          : chr  ">120" "<=120" "<=120" "<=120" ...
##  $ Electrocardiographic: chr  "hyperthropy" "hyperthropy" "hyperthropy" "normal" ...
##  $ HeartRate           : int  150 108 129 187 172 178 160 163 147 155 ...
##  $ Exang               : chr  "no" "yes" "yes" "no" ...
##  $ ST                  : num  2.3 1.5 2.6 3.5 1.4 0.8 3.6 0.6 1.4 3.1 ...
##  $ slope               : chr  "down" "flat" "flat" "down" ...
##  $ Vessel              : int  0 3 2 0 0 0 2 0 1 0 ...
##  $ Thalassemia         : chr  "fixed defect" "normal" "reversible defect" "normal" ...
##  $ HeartDisease        : chr  "no" "yes" "yes" "no" ...
summary(Data_HeartDisease)
##       Age           Gender           ChestPain         BloodPressure  
##  Min.   :29.00   Length:299         Length:299         Min.   : 94.0  
##  1st Qu.:48.00   Class :character   Class :character   1st Qu.:120.0  
##  Median :56.00   Mode  :character   Mode  :character   Median :130.0  
##  Mean   :54.53                                         Mean   :131.7  
##  3rd Qu.:61.00                                         3rd Qu.:140.0  
##  Max.   :77.00                                         Max.   :200.0  
##   Cholesterol     BloodSugar        Electrocardiographic   HeartRate    
##  Min.   :126.0   Length:299         Length:299           Min.   : 71.0  
##  1st Qu.:211.0   Class :character   Class :character     1st Qu.:133.0  
##  Median :242.0   Mode  :character   Mode  :character     Median :153.0  
##  Mean   :247.1                                           Mean   :149.5  
##  3rd Qu.:275.5                                           3rd Qu.:165.5  
##  Max.   :564.0                                           Max.   :202.0  
##     Exang                 ST           slope               Vessel      
##  Length:299         Min.   :0.000   Length:299         Min.   :0.0000  
##  Class :character   1st Qu.:0.000   Class :character   1st Qu.:0.0000  
##  Mode  :character   Median :0.800   Mode  :character   Median :0.0000  
##                     Mean   :1.052                      Mean   :0.6722  
##                     3rd Qu.:1.600                      3rd Qu.:1.0000  
##                     Max.   :6.200                      Max.   :3.0000  
##  Thalassemia        HeartDisease      
##  Length:299         Length:299        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
#Mengubah variabel kategori menjadi factor
#HeartDisease <- na.omit(trimws(Data_HeartDisease$HeartDisease))
#HeartDisease <- as.factor(HeartDisease)
HeartDisease <- factor(Data_HeartDisease$HeartDisease)
#Gender <- na.omit(trimws(Data_HeartDisease$Gender))
#Gender <- as.factor(Gender)
Gender <- factor(Data_HeartDisease$Gender)

#Tabel Kontingensi 2x2 antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease)
contingency_table <- table (Gender, HeartDisease)

cat("\n Tabel 1. Tabel Kontingensi 2x2 antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease) \n")
## 
##  Tabel 1. Tabel Kontingensi 2x2 antara jenis kelamin (Gender) dan status penyakit jantung (HeartDisease)
print(addmargins(contingency_table))
##       HeartDisease
## Gender  no yes Sum
##    F    72  25  97
##    M    89 113 202
##    Sum 161 138 299
#menghitung nilai Chi-square
cat("\n Uji Chi-Square Hubungan antara jenis kelamin dan status penyakit jantung pada taraf nyata 5% \n")
## 
##  Uji Chi-Square Hubungan antara jenis kelamin dan status penyakit jantung pada taraf nyata 5%
chi <- chisq.test(contingency_table, correct = FALSE)
print (chi)
## 
##  Pearson's Chi-squared test
## 
## data:  contingency_table
## X-squared = 23.997, df = 1, p-value = 9.646e-07
#Frekuensi Harapan
cat("\n Tabel 2. Frekuensi Harapan \n")
## 
##  Tabel 2. Frekuensi Harapan
print(chi$expected)
##       HeartDisease
## Gender        no      yes
##      F  52.23077 44.76923
##      M 108.76923 93.23077
#Menghitung nilai OR
OR <-((contingency_table[1,1]*contingency_table[2,2])/(contingency_table[1,2]*contingency_table[2,1]))
#Menampilkan hasil
cat("Nilai Odds Ratio", OR, "\n")
## Nilai Odds Ratio 3.656629
#Menghitung nilai OR dengan Package
library(epitools) 
## Warning: package 'epitools' was built under R version 4.4.1
OR_Hasil <- oddsratio(contingency_table,method = "wald")
cat("Nilai Odds Ratio", OR_Hasil$measure[2,1], "\n")
## Nilai Odds Ratio 3.656629
#Model Regresi Logistik Biner
Vessel <- Data_HeartDisease$Vessel
HeartRate <- Data_HeartDisease$HeartRate
HeartDisease <- ifelse(Data_HeartDisease$HeartDisease == "yes", 1, 0)
Gender <- ifelse(Data_HeartDisease$Gender == "M", 1, 0)
dataRLB <- data.frame(HeartDisease, Gender, Vessel, HeartRate)
RLB <- glm(HeartDisease ~ Gender + HeartRate + Vessel, data = dataRLB, family = binomial)

#Summarize the model
summary (RLB)
## 
## Call:
## glm(formula = HeartDisease ~ Gender + HeartRate + Vessel, family = binomial, 
##     data = dataRLB)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.079707   1.116123   3.655 0.000257 ***
## Gender       1.520229   0.333810   4.554 5.26e-06 ***
## HeartRate   -0.040174   0.007406  -5.425 5.81e-08 ***
## Vessel       1.172716   0.192693   6.086 1.16e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 412.73  on 298  degrees of freedom
## Residual deviance: 283.79  on 295  degrees of freedom
## AIC: 291.79
## 
## Number of Fisher Scoring iterations: 5
#Likelihood Ratio Test using anova function
anova(RLB, test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: HeartDisease
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        298     412.73              
## Gender     1   24.848       297     387.88 6.205e-07 ***
## HeartRate  1   55.721       296     332.16 8.354e-14 ***
## Vessel     1   48.371       295     283.79 3.527e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fit the null model (tanpa variabel bebas)
null_model <- glm(HeartDisease ~ 1, data = dataRLB, family = binomial)

# Fit the full model (dengan variabel bebas)
full_model <- RLB

# Calculate log-likelihood values
loglik_null <- logLik(null_model)
loglik_null
## 'log Lik.' -206.3655 (df=1)
loglik_full <- logLik(full_model)
loglik_full
## 'log Lik.' -141.8957 (df=4)
# Perform likelihood ratio test 
G <- -2*(loglik_null-loglik_full)
G
## 'log Lik.' 128.9396 (df=1)
# Perform likelihood ratio test using a package
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
likelihood_ratio_test <- lrtest(null_model, full_model)
likelihood_ratio_test
## Likelihood ratio test
## 
## Model 1: HeartDisease ~ 1
## Model 2: HeartDisease ~ Gender + HeartRate + Vessel
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   1 -206.37                         
## 2   4 -141.90  3 128.94  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Interpretation of likelihood ratio test results
if (likelihood_ratio_test$"Pr(>Chisq)"[2] < 0.05) 
{
  cat("Reject the null hypothesis. The full model is significantly better than 
        the null model.\n")
} else {
  cat("Fail to reject the null hypothesis. The null model is sufficient.\n")
} 
## Reject the null hypothesis. The full model is significantly better than 
##         the null model.
#Wald Test 
summary (RLB) #lihat Z Value dan P(|Z|)
## 
## Call:
## glm(formula = HeartDisease ~ Gender + HeartRate + Vessel, family = binomial, 
##     data = dataRLB)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.079707   1.116123   3.655 0.000257 ***
## Gender       1.520229   0.333810   4.554 5.26e-06 ***
## HeartRate   -0.040174   0.007406  -5.425 5.81e-08 ***
## Vessel       1.172716   0.192693   6.086 1.16e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 412.73  on 298  degrees of freedom
## Residual deviance: 283.79  on 295  degrees of freedom
## AIC: 291.79
## 
## Number of Fisher Scoring iterations: 5
confint(RLB, level = 0.95)
## Waiting for profiling to be done...
##                   2.5 %      97.5 %
## (Intercept)  1.94495221  6.33577375
## Gender       0.88468750  2.19829584
## HeartRate   -0.05532964 -0.02620644
## Vessel       0.81156423  1.56909195
#Wald Test (manual)
wald_Gender <- RLB$coefficients[2]/summary(RLB)$coefficient["Gender", "Std. Error"]
wald_Gender
##   Gender 
## 4.554173
p_value_Gender <- 1-pchisq(wald_Gender^2, df=1)
p_value_Gender
##       Gender 
## 5.259196e-06
wald_HR <- RLB$coefficients[3]/summary(RLB)$coefficient["HeartRate", "Std. Error"]
wald_HR
## HeartRate 
## -5.424675
p_value_HR <- 1-pchisq(wald_HR^2, df=1)
p_value_HR
##    HeartRate 
## 5.806009e-08
wald_Vessel <- RLB$coefficients[4]/summary(RLB)$coefficient["Vessel", "Std. Error"]
wald_Vessel
##   Vessel 
## 6.085926
p_value_Vessel <- 1-pchisq(wald_Vessel^2, df=1)
p_value_Vessel
##       Vessel 
## 1.158198e-09
#Extract Coefficients
intercept <- round(RLB$coefficients[1], 4)
intercept
## (Intercept) 
##      4.0797
coefficient_Gender <- round(RLB$coefficients[2], 4)
coefficient_Gender
## Gender 
## 1.5202
coefficient_HeartRate <- round(RLB$coefficients[3], 4)
coefficient_HeartRate
## HeartRate 
##   -0.0402
coefficient_Vessel <- round(RLB$coefficients[4], 4)
coefficient_Vessel
## Vessel 
## 1.1727
#Menghitung Nilai OR
odds_ratioGender <- exp(coefficient_Gender)
odds_ratioHeartRate <- exp(coefficient_HeartRate)
odds_ratioVessel <- exp(coefficient_Vessel)

#Print OR
print(paste("Odds Ratio Gender:", round(odds_ratioGender, 4)))
## [1] "Odds Ratio Gender: 4.5731"
print(paste("Odds Ratio HeartRate:", round(odds_ratioHeartRate, 4)))
## [1] "Odds Ratio HeartRate: 0.9606"
print(paste("Odds Ratio Vessel:", round(odds_ratioVessel, 4)))
## [1] "Odds Ratio Vessel: 3.2307"
#Intepretasi OR
cat("\ Tabel 3. Nilai Odds Ratio \n")
##  Tabel 3. Nilai Odds Ratio
round(exp(cbind(OR = coef(RLB), confint(RLB))), 4)
## Waiting for profiling to be done...
##                  OR  2.5 %   97.5 %
## (Intercept) 59.1281 6.9933 564.4059
## Gender       4.5733 2.4222   9.0096
## HeartRate    0.9606 0.9462   0.9741
## Vessel       3.2308 2.2514   4.8023