Disini kita akan menggunakan model regresi logistik dan K-Nearest Neighbor (K-NN) untuk memprediksi adanya penyakit jantung pada pasien, yang merupakan variabel target dengan 13 atribut, yaitu:
age: in years
sex: male or female
chest pain type (4 values)
resting blood pressure: detect blood pressure during rest (in mmHg)
fasting blood sugar > 120 mg/dl
resting electrocardiographic results (values 0,1,2)
maximum heart rate achieved
exercise induced angina
oldpeak = ST depression induced by exercise relative to rest
the slope of the peak exercise ST segment
number of major vessels (0-3) colored by flourosopy
thal: 3 = No Thalassemia; 6 = Fixed Defect Thalassemia; 7 = Reversible Defect Thalassemia
## ï..age sex cp trestbps chol fbs restecg thalach
## 0 0 0 0 0 0 0 0
## exang oldpeak slope ca thal target
## 0 0 0 0 0 0
heart <- heart %>%
rename("age" = 'ï..age') %>%
mutate(sex = ifelse(sex==1, "Male","Female"),
fbs = ifelse(fbs == 1, "> 120 mg/dl", "< 120 mg/dl"),
exang = ifelse(exang == 1, "Exercise Induced Angina" ,"No Exercise Induced Angina"),
cp = ifelse(cp == 0, "Chest Pain Type 0",
ifelse(cp == 1, "Chest Pain Type 1", ifelse(cp==2, "Chest Pain Type 2", "Chest Pain Type 3"))),
restecg = ifelse(restecg == 0, "Normal",
if_else(restecg == 1, "Abnormality", "Probable or Definite")),
thal = ifelse(thal== 0, "No Thalassemia", ifelse(thal==1, "Normal Thalassemia", ifelse(thal==2, "Fixed Defect Thalassemia", "Reversible Defect Thalassemia"))),
target = ifelse(target == 0, "Healthy", "Heart Disease"),
slope = ifelse(slope == 0, "Peak Excercise ST Slope 0", ifelse(slope==1,"Peak Excercise ST Slope 1", "Peak Excercise ST Slope 2"))
) %>%
mutate_if(is.character, as.factor)
head(heart)# heart <- heart %>%
# mutate_if(is.integer, as.factor) %>%
# mutate(sex = factor(sex, levels = c(0,1), labels = c("Female", "Male")),
# fbs =factor(fbs, levels = c(0,1), labels = c("False", "True")),
# exang = factor(exang, levels = c(0,1), labels = c("No", "Yes")),
# target = factor(target, levels = c(0,1),
# labels = c("Health", "Not Health")))
# str(heart)ta <- ggplot(heart[(!is.na(heart$target) & !is.na(heart$age)),], aes(x = age, fill = target)) +
geom_density(alpha=0.5, aes(fill=factor(target))) + labs(title="target density and Age") +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + theme_grey()
tasex <- ggplot(data = heart, mapping = aes(x = target)) +
geom_bar(mapping = aes(fill = sex)) + theme_grey() +
ggtitle("Distribution of Patient by gender") +
xlab("Marital") + ylab("Number of patient") + facet_wrap(heart$sex)
ggplotly(sex)## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
trestbps <- ggplot(heart, aes(x=trestbps, fill=target)) + geom_histogram(aes(y=..density..), color="grey17") +
geom_density(alpha=.2, fill="black")+
facet_wrap(~target, ncol=1,scale="fixed")+
xlab("Resting Blood Pressure (mmHg)") +
ylab("Density/Count") +
ggtitle("Resting Blood Pressure")
ggplotly(trestbps)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
chol <- ggplot(heart, aes(x=chol, fill=target)) + geom_histogram(aes(y=..density..), color="grey17") +
geom_density(alpha=.2, fill="black")+
facet_wrap(~target, ncol=1,scale="fixed")+
xlab("Cholesterol") +
ylab("Density/Count") +
ggtitle("Serum Cholestoral (mg/dl)")
ggplotly(chol)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
thalach <- ggplot(heart, aes(x=thalach , fill=target)) + geom_histogram(aes(y=..density..), color="grey17") +
geom_density(alpha=.2, fill="black")+
facet_wrap(~target, ncol=1,scale="fixed")+
xlab("Maximum Heart Rate Achieved") +
ylab("Density/Count") +
ggtitle("Maximum Heart Rate Achieved")
ggplotly(thalach) ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stdep <- ggplot(heart, aes(x=oldpeak , fill=target)) + geom_histogram(aes(y=..density..), color="grey17") +
geom_density(alpha=.2, fill="yellow")+
facet_wrap(~target, ncol=1,scale="fixed")+
xlab("Oldpeak") +
ylab("Density/Count") +
ggtitle("ST depression induced by exercise relative to rest") +
scale_fill_discrete(name = "Heart Disease", labels = c("Absence", "Presence")) + theme(plot.title = element_text(hjust = 0.5))
ggplotly(stdep)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(heart, aes(x=age, y=oldpeak, color=sex, size=ca)) + geom_point(alpha=0.7) + facet_wrap(~target, ncol=1,scale="fixed") + xlab("Age") +
ylab("Oldpeak") +
scale_fill_discrete(name = "Heart Disease", labels = c("Absence", "Presence")) + theme(plot.title = element_text(hjust = 0.5))##
## Healthy Heart Disease
## 0.4554455 0.5445545
# Data Splitting
set.seed(303)
id <-sample(nrow(heart),nrow(heart)*0.75)
heart_train<-heart[id,]
heart_test<-heart[-id,]# inspect corelation between predictors
ggcorr(heart[,-120], hjust = 1, layout.exp = 2, label = T, label_size = 5)## Warning in ggcorr(heart[, -120], hjust = 1, layout.exp = 2, label = T,
## label_size = 5): data in column(s) 'sex', 'cp', 'fbs', 'restecg', 'exang',
## 'slope', 'thal', 'target' are not numeric and were ignored
##
## Call:
## glm(formula = target ~ ., family = "binomial", data = heart_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7992 -0.3429 0.1546 0.4645 2.8191
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.299211 3.312564 0.694 0.487627
## age -0.007418 0.028227 -0.263 0.792714
## sexMale -1.022120 0.578166 -1.768 0.077083 .
## cpChest Pain Type 1 0.710026 0.653115 1.087 0.276976
## cpChest Pain Type 2 1.764556 0.543602 3.246 0.001170 **
## cpChest Pain Type 3 1.935198 0.760741 2.544 0.010964 *
## trestbps -0.019759 0.013441 -1.470 0.141540
## chol -0.005541 0.005435 -1.020 0.307925
## fbs> 120 mg/dl 0.587834 0.679932 0.865 0.387287
## restecgNormal -0.241410 0.456491 -0.529 0.596917
## restecgProbable or Definite -0.142914 2.294098 -0.062 0.950327
## thalach 0.019879 0.012566 1.582 0.113650
## exangNo Exercise Induced Angina 1.108604 0.493709 2.245 0.024739 *
## oldpeak -0.375135 0.261791 -1.433 0.151870
## slopePeak Excercise ST Slope 1 -0.438271 1.030294 -0.425 0.670556
## slopePeak Excercise ST Slope 2 0.555018 1.111701 0.499 0.617602
## ca -0.724630 0.262506 -2.760 0.005772 **
## thalNo Thalassemia -1.880193 2.093017 -0.898 0.369017
## thalNormal Thalassemia -1.183986 0.961793 -1.231 0.218316
## thalReversible Defect Thalassemia -1.619954 0.492001 -3.293 0.000993 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 313.41 on 226 degrees of freedom
## Residual deviance: 148.39 on 207 degrees of freedom
## AIC: 188.39
##
## Number of Fisher Scoring iterations: 6
## Start: AIC=188.39
## target ~ age + sex + cp + trestbps + chol + fbs + restecg + thalach +
## exang + oldpeak + slope + ca + thal
##
## Df Deviance AIC
## - restecg 2 148.68 184.68
## - age 1 148.46 186.46
## - fbs 1 149.16 187.16
## - chol 1 149.44 187.44
## - slope 2 152.12 188.12
## <none> 148.40 188.40
## - oldpeak 1 150.54 188.54
## - trestbps 1 150.62 188.62
## - thalach 1 151.01 189.01
## - sex 1 151.63 189.63
## - exang 1 153.46 191.46
## - thal 3 160.24 194.24
## - ca 1 156.26 194.26
## - cp 3 162.75 196.75
##
## Step: AIC=184.67
## target ~ age + sex + cp + trestbps + chol + fbs + thalach + exang +
## oldpeak + slope + ca + thal
##
## Df Deviance AIC
## - age 1 148.80 182.80
## - fbs 1 149.48 183.48
## - chol 1 149.97 183.97
## <none> 148.68 184.68
## - oldpeak 1 150.79 184.79
## - slope 2 152.88 184.88
## - trestbps 1 150.98 184.98
## - thalach 1 151.30 185.30
## - sex 1 152.22 186.22
## - exang 1 153.73 187.73
## - thal 3 160.25 190.25
## - ca 1 156.54 190.54
## - cp 3 162.85 192.85
##
## Step: AIC=182.8
## target ~ sex + cp + trestbps + chol + fbs + thalach + exang +
## oldpeak + slope + ca + thal
##
## Df Deviance AIC
## - fbs 1 149.59 181.59
## - chol 1 150.28 182.28
## <none> 148.80 182.80
## - oldpeak 1 150.84 182.84
## - slope 2 152.97 182.97
## - trestbps 1 151.60 183.60
## - sex 1 152.31 184.31
## - thalach 1 152.44 184.44
## - exang 1 153.91 185.91
## - thal 3 160.40 188.40
## - ca 1 157.76 189.76
## - cp 3 163.07 191.07
##
## Step: AIC=181.59
## target ~ sex + cp + trestbps + chol + thalach + exang + oldpeak +
## slope + ca + thal
##
## Df Deviance AIC
## - chol 1 150.89 180.89
## <none> 149.59 181.59
## - slope 2 153.66 181.66
## - oldpeak 1 151.75 181.75
## - trestbps 1 151.99 181.99
## - sex 1 152.88 182.88
## - thalach 1 153.18 183.18
## - exang 1 154.27 184.27
## - thal 3 161.20 187.20
## - ca 1 157.84 187.84
## - cp 3 165.46 191.46
##
## Step: AIC=180.89
## target ~ sex + cp + trestbps + thalach + exang + oldpeak + slope +
## ca + thal
##
## Df Deviance AIC
## <none> 150.89 180.89
## - slope 2 155.31 181.31
## - trestbps 1 153.41 181.41
## - oldpeak 1 153.43 181.43
## - sex 1 153.49 181.49
## - thalach 1 154.41 182.41
## - exang 1 155.24 183.24
## - thal 3 162.41 186.41
## - ca 1 159.56 187.56
## - cp 3 167.37 191.37
model.b <- glm(target ~ sex + cp + trestbps + thalach + exang + oldpeak + slope +
ca + thal,heart_train, family='binomial')
summary(model.b)##
## Call:
## glm(formula = target ~ sex + cp + trestbps + thalach + exang +
## oldpeak + slope + ca + thal, family = "binomial", data = heart_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7972 -0.3713 0.1809 0.4796 2.7643
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.21948 2.58654 0.085 0.932376
## sexMale -0.87424 0.54828 -1.595 0.110823
## cpChest Pain Type 1 0.76182 0.64011 1.190 0.233996
## cpChest Pain Type 2 1.88346 0.53225 3.539 0.000402 ***
## cpChest Pain Type 3 1.97437 0.74872 2.637 0.008364 **
## trestbps -0.01965 0.01264 -1.555 0.120051
## thalach 0.02101 0.01151 1.825 0.067987 .
## exangNo Exercise Induced Angina 1.00303 0.47989 2.090 0.036607 *
## oldpeak -0.39790 0.25501 -1.560 0.118689
## slopePeak Excercise ST Slope 1 -0.47540 1.00992 -0.471 0.637831
## slopePeak Excercise ST Slope 2 0.58853 1.08607 0.542 0.587893
## ca -0.70002 0.24603 -2.845 0.004438 **
## thalNo Thalassemia -1.41522 2.38294 -0.594 0.552582
## thalNormal Thalassemia -0.96782 0.92548 -1.046 0.295676
## thalReversible Defect Thalassemia -1.57991 0.47940 -3.296 0.000982 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 313.41 on 226 degrees of freedom
## Residual deviance: 150.89 on 212 degrees of freedom
## AIC: 180.89
##
## Number of Fisher Scoring iterations: 6
from the model, model.b, we could interprete several things:
The odds of male to diagnosed with heart disease is 42% less than female.
Patient detected with no exercise induced angina (unstable angina) is 3 times more reluctant to heart disease compared with patient with exercise induced angina.
The odds of patient detected with oldpeak to diagnosed with heart disease is 70% less than those who don’t.
Preparation We put the predict value from the model into our data, heart_train and heart_test.
heart_test$pred.Target <- predict(model.b, heart_test, type = 'response')
heart_train$pred.Target <- predict(model.b, heart_train, type = 'response')performa <- function(cutoff, prob, ref, postarget, negtarget)
{
predict <- factor(ifelse(prob >= cutoff, postarget, negtarget))
conf <- caret::confusionMatrix(predict , ref, positive = postarget)
acc <- conf$overall[1]
rec <- conf$byClass[1]
prec <- conf$byClass[3]
spec <- conf$byClass[2]
mat <- t(as.matrix(c(rec , acc , prec, spec)))
colnames(mat) <- c("recall", "accuracy", "precicion", "specificity")
return(mat)
}
co <- seq(0.01,0.80,length=100)
result <- matrix(0,100,4)
for(i in 1:100){
result[i,] = performa(cutoff = co[i],
prob = heart_test$pred.Target,
ref = heart_test$target,
postarget = "Heart Disease",
negtarget = "Healthy")
}
ggplotly(tibble("Recall" = result[,1],
"Accuracy" = result[,2],
"Precision" = result[,3],
"Specificity" = result[,4],
"Cutoff" = co) %>%
gather(key = "performa", value = "value", 1:4) %>%
ggplot(aes(x = Cutoff, y = value, col = performa)) +
geom_line(lwd = 1.5) +
scale_color_manual(values = c("darkred","darkgreen","orange", "blue")) +
scale_y_continuous(breaks = seq(0,1,0.1), limits = c(0,1)) +
scale_x_continuous(breaks = seq(0,1,0.1)) +
labs(title = "Tradeoff model perfomance") +
theme_minimal() +
theme(legend.position = "top",
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank()))Before doing evaluation, we need to inspect the good cutoff or threshold to maximize the accuracy, recall and precision value. Here from the graph, 0.63 is the most balance value.
## Confusion Matrix and Statistics
##
## Reference
## Prediction Healthy Heart Disease
## Healthy 96 16
## Heart Disease 9 106
##
## Accuracy : 0.8899
## 95% CI : (0.8417, 0.9274)
## No Information Rate : 0.5374
## P-Value [Acc > NIR] : <0.0000000000000002
##
## Kappa : 0.7795
##
## Mcnemar's Test P-Value : 0.2301
##
## Sensitivity : 0.8689
## Specificity : 0.9143
## Pos Pred Value : 0.9217
## Neg Pred Value : 0.8571
## Prevalence : 0.5374
## Detection Rate : 0.4670
## Detection Prevalence : 0.5066
## Balanced Accuracy : 0.8916
##
## 'Positive' Class : Heart Disease
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction Healthy Heart Disease
## Healthy 27 6
## Heart Disease 6 37
##
## Accuracy : 0.8421
## 95% CI : (0.7404, 0.9157)
## No Information Rate : 0.5658
## P-Value [Acc > NIR] : 0.0000002689
##
## Kappa : 0.6786
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8605
## Specificity : 0.8182
## Pos Pred Value : 0.8605
## Neg Pred Value : 0.8182
## Prevalence : 0.5658
## Detection Rate : 0.4868
## Detection Prevalence : 0.5658
## Balanced Accuracy : 0.8393
##
## 'Positive' Class : Heart Disease
##
The train data give accuracy of 88.99%, while the data test give accurracy of 84.21%. Because the accuracy is not far, so we can assume the model is fit.
#Variable Transforming
dv <- dummyVars(" ~.", data = heart)
heart2 <- data.frame(predict(dv, newdata = heart))
str(heart2)## 'data.frame': 303 obs. of 28 variables:
## $ age : num 63 37 41 56 57 57 56 44 52 57 ...
## $ sex.Female : num 0 0 1 0 1 0 1 0 0 0 ...
## $ sex.Male : num 1 1 0 1 0 1 0 1 1 1 ...
## $ cp.Chest.Pain.Type.0 : num 0 0 0 0 1 1 0 0 0 0 ...
## $ cp.Chest.Pain.Type.1 : num 0 0 1 1 0 0 1 1 0 0 ...
## $ cp.Chest.Pain.Type.2 : num 0 1 0 0 0 0 0 0 1 1 ...
## $ cp.Chest.Pain.Type.3 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ trestbps : num 145 130 130 120 120 140 140 120 172 150 ...
## $ chol : num 233 250 204 236 354 192 294 263 199 168 ...
## $ fbs...120.mg.dl : num 0 1 1 1 1 1 1 1 0 1 ...
## $ fbs...120.mg.dl.1 : num 1 0 0 0 0 0 0 0 1 0 ...
## $ restecg.Abnormality : num 0 1 0 1 1 1 0 1 1 1 ...
## $ restecg.Normal : num 1 0 1 0 0 0 1 0 0 0 ...
## $ restecg.Probable.or.Definite : num 0 0 0 0 0 0 0 0 0 0 ...
## $ thalach : num 150 187 172 178 163 148 153 173 162 174 ...
## $ exang.Exercise.Induced.Angina : num 0 0 0 0 1 0 0 0 0 0 ...
## $ exang.No.Exercise.Induced.Angina : num 1 1 1 1 0 1 1 1 1 1 ...
## $ oldpeak : num 2.3 3.5 1.4 0.8 0.6 0.4 1.3 0 0.5 1.6 ...
## $ slope.Peak.Excercise.ST.Slope.0 : num 1 1 0 0 0 0 0 0 0 0 ...
## $ slope.Peak.Excercise.ST.Slope.1 : num 0 0 0 0 0 1 1 0 0 0 ...
## $ slope.Peak.Excercise.ST.Slope.2 : num 0 0 1 1 1 0 0 1 1 1 ...
## $ ca : num 0 0 0 0 0 0 0 0 0 0 ...
## $ thal.Fixed.Defect.Thalassemia : num 0 1 1 1 1 0 1 0 0 1 ...
## $ thal.No.Thalassemia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ thal.Normal.Thalassemia : num 1 0 0 0 0 1 0 0 0 0 ...
## $ thal.Reversible.Defect.Thalassemia: num 0 0 0 0 0 0 0 1 1 0 ...
## $ target.Healthy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ target.Heart.Disease : num 1 1 1 1 1 1 1 1 1 1 ...
We need to transform all the variables into numeric class, as KNN only processing numeric variables.
heart2 <- heart2 %>%
dplyr::select(-c(target.Healthy, sex.Female, fbs...120.mg.dl.1 , exang.Exercise.Induced.Angina))
names(heart2) ## [1] "age" "sex.Male"
## [3] "cp.Chest.Pain.Type.0" "cp.Chest.Pain.Type.1"
## [5] "cp.Chest.Pain.Type.2" "cp.Chest.Pain.Type.3"
## [7] "trestbps" "chol"
## [9] "fbs...120.mg.dl" "restecg.Abnormality"
## [11] "restecg.Normal" "restecg.Probable.or.Definite"
## [13] "thalach" "exang.No.Exercise.Induced.Angina"
## [15] "oldpeak" "slope.Peak.Excercise.ST.Slope.0"
## [17] "slope.Peak.Excercise.ST.Slope.1" "slope.Peak.Excercise.ST.Slope.2"
## [19] "ca" "thal.Fixed.Defect.Thalassemia"
## [21] "thal.No.Thalassemia" "thal.Normal.Thalassemia"
## [23] "thal.Reversible.Defect.Thalassemia" "target.Heart.Disease"
#Data Splitting
heart2_train <- heart2[id,1:22]
heart2_test <- heart2[-id,1:22]
heart2_train_label <- heart2[id,23]%>% as.factor()
heart2_test_label <- heart2[-id,23] %>% as.factor()Then, we need to scale the data as the scale from each variables are different.
## [1] 15.06652
#Data Train Evaluation
knn.train <- confusionMatrix(pred.knn.train, heart2_train_label, positive="1") %>% print()## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 129 8
## 1 11 79
##
## Accuracy : 0.9163
## 95% CI : (0.8724, 0.9489)
## No Information Rate : 0.6167
## P-Value [Acc > NIR] : <0.0000000000000002
##
## Kappa : 0.8241
##
## Mcnemar's Test P-Value : 0.6464
##
## Sensitivity : 0.9080
## Specificity : 0.9214
## Pos Pred Value : 0.8778
## Neg Pred Value : 0.9416
## Prevalence : 0.3833
## Detection Rate : 0.3480
## Detection Prevalence : 0.3965
## Balanced Accuracy : 0.9147
##
## 'Positive' Class : 1
##
#Data Test Evaluation
knn.test <- confusionMatrix(pred.knn.test, heart2_test_label, positive="1") %>% print()## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 44 5
## 1 2 25
##
## Accuracy : 0.9079
## 95% CI : (0.8194, 0.9622)
## No Information Rate : 0.6053
## P-Value [Acc > NIR] : 0.000000003453
##
## Kappa : 0.8038
##
## Mcnemar's Test P-Value : 0.4497
##
## Sensitivity : 0.8333
## Specificity : 0.9565
## Pos Pred Value : 0.9259
## Neg Pred Value : 0.8980
## Prevalence : 0.3947
## Detection Rate : 0.3289
## Detection Prevalence : 0.3553
## Balanced Accuracy : 0.8949
##
## 'Positive' Class : 1
##
The train data give accuracy of 91.63% , while the data test give accurracy of 90.79%. Because the accuracy is not far, so we can assume the model is fit.
eval_logit <- data_frame(Accuracy = logtest$overall[1],
Recall = logtest$byClass[1],
Precision = logtest$byClass[3]) %>% print()## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## # A tibble: 1 x 3
## Accuracy Recall Precision
## <dbl> <dbl> <dbl>
## 1 0.842 0.860 0.860
eval_knn <- data_frame(Accuracy = knn.test$overall[1],
Recall = knn.test$byClass[1],
Precision = knn.test$byClass[3]) %>% print()## # A tibble: 1 x 3
## Accuracy Recall Precision
## <dbl> <dbl> <dbl>
## 1 0.908 0.833 0.926
logistic regression give accuracy of 84.21%, while KNN model give accuracy of 90.78%. logistic regression also could predict the actual patient with heart disease with recall value of 86.79%, compared to KNN model with value of 83.33%.