library(readr)
library(tidyverse)
library(tidyr)
library(dendextend)
library(knitr)
library(gridExtra)
library(ggplot2)
library(VIM)
library(corrplot)
library(car)
library(ResourceSelection)
library(glmulti)
library(tree)
library(randomForest)
library(ISLR)
library(class)
library(pROC)
library(gtools)
library(tidyverse)
library(GGally)
library(superml)
library(caret)
library(Boruta)
library("stringr")
library("tidyr")
library("readr")
library("here")
library("skimr")
library("janitor")
library("lubridate")
library(gridExtra)
library(ggplot2)
library(VIM)
library(corrplot)
library(car)
library(ResourceSelection)
library(glmulti)
library(tree)
library(randomForest)
library(ISLR)
library(class)
library(pROC)
library(gtools)
library(tidyverse)
library("scales")
library("ggcorrplot")
library("ggrepel")
library("forcats")
library("corrgram")
library(tidymodels)
library(baguette)
library(discrim)
library(bonsai)
library(ResourceSelection)
library(kableExtra)
library(broom)
library(dplyr)
library(caret)
library(tidyr)
library(corrplot)
library("Hmisc")
library(psych)
library(factoextra)
library("DescTools")
library(ResourceSelection)
library(haven)
library(effectsize)
library(rstatix)
library(ggpubr)
library(biotools)
library(PerformanceAnalytics)
library(heplots)
library(gplots)
clean_df <- read.csv('/home/ilke/Downloads/clean_heart.csv')
model_heart <- glm(HeartDisease~Age+Sex+ChestPainType+RestingBP+Cholesterol+FastingBS+RestingECG+MaxHR+ExerciseAngina+Oldpeak+ST_Slope, family = "binomial", data = clean_df)
summary(model_heart)
##
## Call:
## glm(formula = HeartDisease ~ Age + Sex + ChestPainType + RestingBP +
## Cholesterol + FastingBS + RestingECG + MaxHR + ExerciseAngina +
## Oldpeak + ST_Slope, family = "binomial", data = clean_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.924678 1.869783 -2.634 0.00844 **
## Age 0.030741 0.015217 2.020 0.04337 *
## SexM 1.825752 0.324087 5.634 1.77e-08 ***
## ChestPainTypeATA -1.823876 0.369458 -4.937 7.95e-07 ***
## ChestPainTypeNAP -1.534945 0.307773 -4.987 6.12e-07 ***
## ChestPainTypeTA -1.518673 0.491168 -3.092 0.00199 **
## RestingBP 0.011386 0.008523 1.336 0.18159
## Cholesterol 0.003868 0.002524 1.533 0.12536
## FastingBS 0.284537 0.342271 0.831 0.40579
## RestingECGNormal -0.188434 0.289890 -0.650 0.51568
## RestingECGST -0.096025 0.404308 -0.238 0.81227
## MaxHR -0.002272 0.006032 -0.377 0.70647
## ExerciseAnginaY 0.825277 0.277203 2.977 0.00291 **
## Oldpeak 0.378333 0.153670 2.462 0.01382 *
## ST_SlopeFlat 0.969593 0.561267 1.728 0.08408 .
## ST_SlopeUp -1.480541 0.603553 -2.453 0.01417 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 969.32 on 701 degrees of freedom
## Residual deviance: 456.91 on 686 degrees of freedom
## AIC: 488.91
##
## Number of Fisher Scoring iterations: 6
exp(model_heart$coefficients)
## (Intercept) Age SexM ChestPainTypeATA
## 0.007265067 1.031218721 6.207461865 0.161399026
## ChestPainTypeNAP ChestPainTypeTA RestingBP Cholesterol
## 0.215467576 0.219002248 1.011451502 1.003875393
## FastingBS RestingECGNormal RestingECGST MaxHR
## 1.329146854 0.828255401 0.908441570 0.997730875
## ExerciseAnginaY Oldpeak ST_SlopeFlat ST_SlopeUp
## 2.282513158 1.459848785 2.636871313 0.227514654
model_heart$deviance
## [1] 456.9134
model_heart$null.deviance
## [1] 969.3233
kikare<- model_heart$null.deviance-model_heart$deviance
kikare
## [1] 512.4099
model_heart$df.null
## [1] 701
model_heart$df.residual
## [1] 686
df<-model_heart$df.null-model_heart$df.residual
df
## [1] 15
#Ki kare istatistigine ait p degerinin hesabi (p<0.05 ise eklenen degiskenlerin modele katkisi anlamlidir.)
kikare.p<- 1 - pchisq(kikare,df)
kikare.p
## [1] 0
###Hoshmer Lemeshov hesabi (p>0.05 ise model anlamlıdır. yani model veriye uyumludur.)
library(ResourceSelection)
hoslem.test(model_heart$y,fitted(model_heart))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: model_heart$y, fitted(model_heart)
## X-squared = 8.4045, df = 8, p-value = 0.395
Araştırmanın bağımsız değişkenleri, kişinin kalp hastalığına sahip olup olmama durumunun varyansının yüzde 69’unu açıklıyor.
#Modelin R^2 degerlerinin hesabi
library("DescTools")
PseudoR2(model_heart, which = c("CoxSnell","Nagelkerke"))
## CoxSnell Nagelkerke
## 0.5180566 0.6920125
#Model katsayilarinin exponential alinmis hali ve güven araliklari
exp(coef(model_heart))
## (Intercept) Age SexM ChestPainTypeATA
## 0.007265067 1.031218721 6.207461865 0.161399026
## ChestPainTypeNAP ChestPainTypeTA RestingBP Cholesterol
## 0.215467576 0.219002248 1.011451502 1.003875393
## FastingBS RestingECGNormal RestingECGST MaxHR
## 1.329146854 0.828255401 0.908441570 0.997730875
## ExerciseAnginaY Oldpeak ST_SlopeFlat ST_SlopeUp
## 2.282513158 1.459848785 2.636871313 0.227514654
exp(confint.default(model_heart,level = 0.95))
## 2.5 % 97.5 %
## (Intercept) 0.0001860706 0.2836623
## Age 1.0009165256 1.0624383
## SexM 3.2889034844 11.7159360
## ChestPainTypeATA 0.0782379434 0.3329541
## ChestPainTypeNAP 0.1178704553 0.3938754
## ChestPainTypeTA 0.0836306446 0.5734977
## RestingBP 0.9946948231 1.0284905
## Cholesterol 0.9989221816 1.0088532
## FastingBS 0.6795653527 2.5996490
## RestingECGNormal 0.4692554978 1.4619051
## RestingECGST 0.4112912130 2.0065250
## MaxHR 0.9860046055 1.0095966
## ExerciseAnginaY 1.3257380387 3.9297856
## Oldpeak 1.0801993822 1.9729307
## ST_SlopeFlat 0.8776828838 7.9220986
## ST_SlopeUp 0.0697050213 0.7425996
heart_pred<-fitted(model_heart)
typefac<- ifelse(heart_pred>0.5,"1","0")
t_tab <- table(clean_df$HeartDisease, typefac)
t_tab
## typefac
## 0 1
## 0 326 51
## 1 42 283
#Toplam Dogru Atanma Yüzdesi
sum(diag(t_tab)) / sum(t_tab)
## [1] 0.8675214
# Confusion matrix values
TN <- 326
FP <- 51
FN <- 42
TP <- 283
# Accuracy
accuracy <- (TN + TP) / (TN + FP + FN + TP)
# Precision
precision <- TP / (TP + FP)
# Recall (Sensitivity)
recall <- TP / (TP + FN)
# F1 score
f1_score <- 2 * (precision * recall) / (precision + recall)
# Print the results
cat("Accuracy:", accuracy, "\n")
## Accuracy: 0.8675214
cat("Precision:", precision, "\n")
## Precision: 0.8473054
cat("Recall:", recall, "\n")
## Recall: 0.8707692
cat("F1 Score:", f1_score, "\n")
## F1 Score: 0.8588771
datatib <- tibble("target" = clean_df$HeartDisease,"prediction" = typefac)
datatib
## # A tibble: 702 × 2
## target prediction
## <int> <chr>
## 1 0 0
## 2 1 0
## 3 0 0
## 4 1 1
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 1 1
## 10 0 0
## # ℹ 692 more rows
basic_table <- table(datatib)
basic_table
## prediction
## target 0 1
## 0 326 51
## 1 42 283
cfm <- tidy(basic_table)
## Warning: 'tidy.table' is deprecated.
## Use 'tibble::as_tibble()' instead.
## See help("Deprecated")
cfm
## # A tibble: 4 × 3
## target prediction n
## <chr> <chr> <int>
## 1 0 0 326
## 2 1 0 42
## 3 0 1 51
## 4 1 1 283