1. Kütüphanelerin Yüklenmesi

library(readr)
library(tidyverse)
library(tidyr)
library(dendextend)
library(knitr)
library(gridExtra)
library(ggplot2)
library(VIM)
library(corrplot)
library(car)
library(ResourceSelection)
library(glmulti)
library(tree)
library(randomForest)
library(ISLR)
library(class)
library(pROC)
library(gtools)
library(tidyverse)
library(GGally)
library(superml)
library(caret)
library(Boruta)
library("stringr")
library("tidyr")
library("readr")
library("here")
library("skimr")
library("janitor")
library("lubridate")
library(gridExtra)
library(ggplot2)
library(VIM)
library(corrplot)
library(car)
library(ResourceSelection)
library(glmulti)
library(tree)
library(randomForest)
library(ISLR)
library(class)
library(pROC)
library(gtools)
library(tidyverse)
library("scales")
library("ggcorrplot")
library("ggrepel")
library("forcats")
library("corrgram")
library(tidymodels)
library(baguette)
library(discrim)
library(bonsai)
library(ResourceSelection)
library(kableExtra)
library(broom)
library(dplyr)
library(caret)
library(tidyr)
library(corrplot)
library("Hmisc")
library(psych)
library(factoextra)
library("DescTools")
library(ResourceSelection)
library(haven)
library(effectsize)
library(rstatix)
library(ggpubr)
library(biotools)
library(PerformanceAnalytics)
library(heplots)
library(gplots)
clean_df <- read.csv('/home/ilke/Downloads/clean_heart.csv')

2. Lojistik Regresyon

model_heart <- glm(HeartDisease~Age+Sex+ChestPainType+RestingBP+Cholesterol+FastingBS+RestingECG+MaxHR+ExerciseAngina+Oldpeak+ST_Slope, family = "binomial", data = clean_df)
summary(model_heart)
## 
## Call:
## glm(formula = HeartDisease ~ Age + Sex + ChestPainType + RestingBP + 
##     Cholesterol + FastingBS + RestingECG + MaxHR + ExerciseAngina + 
##     Oldpeak + ST_Slope, family = "binomial", data = clean_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -4.924678   1.869783  -2.634  0.00844 ** 
## Age               0.030741   0.015217   2.020  0.04337 *  
## SexM              1.825752   0.324087   5.634 1.77e-08 ***
## ChestPainTypeATA -1.823876   0.369458  -4.937 7.95e-07 ***
## ChestPainTypeNAP -1.534945   0.307773  -4.987 6.12e-07 ***
## ChestPainTypeTA  -1.518673   0.491168  -3.092  0.00199 ** 
## RestingBP         0.011386   0.008523   1.336  0.18159    
## Cholesterol       0.003868   0.002524   1.533  0.12536    
## FastingBS         0.284537   0.342271   0.831  0.40579    
## RestingECGNormal -0.188434   0.289890  -0.650  0.51568    
## RestingECGST     -0.096025   0.404308  -0.238  0.81227    
## MaxHR            -0.002272   0.006032  -0.377  0.70647    
## ExerciseAnginaY   0.825277   0.277203   2.977  0.00291 ** 
## Oldpeak           0.378333   0.153670   2.462  0.01382 *  
## ST_SlopeFlat      0.969593   0.561267   1.728  0.08408 .  
## ST_SlopeUp       -1.480541   0.603553  -2.453  0.01417 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 969.32  on 701  degrees of freedom
## Residual deviance: 456.91  on 686  degrees of freedom
## AIC: 488.91
## 
## Number of Fisher Scoring iterations: 6
exp(model_heart$coefficients)
##      (Intercept)              Age             SexM ChestPainTypeATA 
##      0.007265067      1.031218721      6.207461865      0.161399026 
## ChestPainTypeNAP  ChestPainTypeTA        RestingBP      Cholesterol 
##      0.215467576      0.219002248      1.011451502      1.003875393 
##        FastingBS RestingECGNormal     RestingECGST            MaxHR 
##      1.329146854      0.828255401      0.908441570      0.997730875 
##  ExerciseAnginaY          Oldpeak     ST_SlopeFlat       ST_SlopeUp 
##      2.282513158      1.459848785      2.636871313      0.227514654
model_heart$deviance
## [1] 456.9134
model_heart$null.deviance
## [1] 969.3233
kikare<- model_heart$null.deviance-model_heart$deviance
kikare
## [1] 512.4099
model_heart$df.null
## [1] 701
model_heart$df.residual
## [1] 686
df<-model_heart$df.null-model_heart$df.residual
df
## [1] 15
#Ki kare istatistigine ait p degerinin hesabi (p<0.05 ise eklenen degiskenlerin modele katkisi anlamlidir.)
kikare.p<- 1 - pchisq(kikare,df)
kikare.p
## [1] 0
###Hoshmer Lemeshov hesabi (p>0.05 ise model anlamlıdır. yani model veriye uyumludur.)

library(ResourceSelection)
hoslem.test(model_heart$y,fitted(model_heart))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  model_heart$y, fitted(model_heart)
## X-squared = 8.4045, df = 8, p-value = 0.395

Araştırmanın bağımsız değişkenleri, kişinin kalp hastalığına sahip olup olmama durumunun varyansının yüzde 69’unu açıklıyor.

#Modelin R^2 degerlerinin hesabi 

library("DescTools")
PseudoR2(model_heart, which = c("CoxSnell","Nagelkerke"))
##   CoxSnell Nagelkerke 
##  0.5180566  0.6920125
#Model katsayilarinin exponential alinmis hali ve güven araliklari
exp(coef(model_heart))
##      (Intercept)              Age             SexM ChestPainTypeATA 
##      0.007265067      1.031218721      6.207461865      0.161399026 
## ChestPainTypeNAP  ChestPainTypeTA        RestingBP      Cholesterol 
##      0.215467576      0.219002248      1.011451502      1.003875393 
##        FastingBS RestingECGNormal     RestingECGST            MaxHR 
##      1.329146854      0.828255401      0.908441570      0.997730875 
##  ExerciseAnginaY          Oldpeak     ST_SlopeFlat       ST_SlopeUp 
##      2.282513158      1.459848785      2.636871313      0.227514654
exp(confint.default(model_heart,level = 0.95)) 
##                         2.5 %     97.5 %
## (Intercept)      0.0001860706  0.2836623
## Age              1.0009165256  1.0624383
## SexM             3.2889034844 11.7159360
## ChestPainTypeATA 0.0782379434  0.3329541
## ChestPainTypeNAP 0.1178704553  0.3938754
## ChestPainTypeTA  0.0836306446  0.5734977
## RestingBP        0.9946948231  1.0284905
## Cholesterol      0.9989221816  1.0088532
## FastingBS        0.6795653527  2.5996490
## RestingECGNormal 0.4692554978  1.4619051
## RestingECGST     0.4112912130  2.0065250
## MaxHR            0.9860046055  1.0095966
## ExerciseAnginaY  1.3257380387  3.9297856
## Oldpeak          1.0801993822  1.9729307
## ST_SlopeFlat     0.8776828838  7.9220986
## ST_SlopeUp       0.0697050213  0.7425996
heart_pred<-fitted(model_heart)
typefac<- ifelse(heart_pred>0.5,"1","0")
t_tab <- table(clean_df$HeartDisease, typefac)
t_tab
##    typefac
##       0   1
##   0 326  51
##   1  42 283
#Toplam Dogru Atanma Yüzdesi
sum(diag(t_tab)) / sum(t_tab)
## [1] 0.8675214
# Confusion matrix values
TN <- 326
FP <- 51
FN <- 42
TP <- 283

# Accuracy
accuracy <- (TN + TP) / (TN + FP + FN + TP)

# Precision
precision <- TP / (TP + FP)

# Recall (Sensitivity)
recall <- TP / (TP + FN)

# F1 score
f1_score <- 2 * (precision * recall) / (precision + recall)

# Print the results
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.8675214
cat("Precision:", precision, "\n")
## Precision: 0.8473054
cat("Recall:", recall, "\n")
## Recall: 0.8707692
cat("F1 Score:", f1_score, "\n")
## F1 Score: 0.8588771
datatib <- tibble("target" = clean_df$HeartDisease,"prediction" = typefac)
datatib
## # A tibble: 702 × 2
##    target prediction
##     <int> <chr>     
##  1      0 0         
##  2      1 0         
##  3      0 0         
##  4      1 1         
##  5      0 0         
##  6      0 0         
##  7      0 0         
##  8      0 0         
##  9      1 1         
## 10      0 0         
## # ℹ 692 more rows
basic_table <- table(datatib)
basic_table
##       prediction
## target   0   1
##      0 326  51
##      1  42 283
cfm <- tidy(basic_table)
## Warning: 'tidy.table' is deprecated.
## Use 'tibble::as_tibble()' instead.
## See help("Deprecated")
cfm
## # A tibble: 4 × 3
##   target prediction     n
##   <chr>  <chr>      <int>
## 1 0      0            326
## 2 1      0             42
## 3 0      1             51
## 4 1      1            283