The dataset contain images of Animals, Fruits and Vegetables.The task is to classify and cluster these images based on visual features.The dataset contain main 3 feature.
suppressPackageStartupMessages({
suppressWarnings({
library(magick)
library(ggplot2)
library(imager)
library(dplyr)
library(caret)
library(hopkins)
library(clustertend)
library(cluster)
library(factoextra)
library(dbscan)
library(neuralnet)
library(tidyverse)
library(GGally)
})
})
Each image is resized into 28*28 pixels and convert into gray scale for uniformality. And having 10 % favorite image (my favorite animal is Dog).Favorite animal is labeled as 1 and others as 0.Contain a total of 325 image and contain 786 (784 is pixel value , favorite animal, category (animal, fruits, vegetables)).
animal_path <- "D:/alliance/3rd sem/machine learning_2/DATASET/img_DS_1/img_DS/Animal"
fruit_path <- "D:/alliance/3rd sem/machine learning_2/DATASET/img_DS_1/img_DS/Fruits"
vegitable_path <- "D:/alliance/3rd sem/machine learning_2/DATASET/img_DS_1/img_DS/vegetable"
img_path <- list(animal_path, fruit_path, vegitable_path)
img_path
## [[1]]
## [1] "D:/alliance/3rd sem/machine learning_2/DATASET/img_DS_1/img_DS/Animal"
##
## [[2]]
## [1] "D:/alliance/3rd sem/machine learning_2/DATASET/img_DS_1/img_DS/Fruits"
##
## [[3]]
## [1] "D:/alliance/3rd sem/machine learning_2/DATASET/img_DS_1/img_DS/vegetable"
df <- data.frame(matrix = numeric(), favorite = integer(), category = character(), stringsAsFactors = FALSE)
Each image is processed as follows,
for (paths in img_path){
image_file <- list.files(paths, recursive = TRUE, full.names = TRUE)
for (image in image_file){
img <- image_read(image)
img <- image_resize(img, "28x28!")
img_gray <- image_convert(img, colorspace="gray")
img_data <- image_data(img_gray)
img_matrix <- as.numeric(as.matrix(img_data))
if(grepl("dog \\([0-9]+\\)",image)){
label <- 1
}else{
label <- 0
}
category <- basename(paths)
df <- rbind(df, c(img_matrix, favorite = label, category = category))
}
}
print(dim(df))
## [1] 325 786
str(df)
## 'data.frame': 325 obs. of 786 variables:
## $ X.123. : chr "123" "103" "109" "145" ...
## $ X.131. : chr "131" "133" "107" "160" ...
## $ X.135. : chr "135" "136" "105" "185" ...
## $ X.137. : chr "137" "126" "107" "207" ...
## $ X.139. : chr "139" "127" "121" "201" ...
## $ X.146. : chr "146" "154" "119" "202" ...
## $ X.143. : chr "143" "152" "106" "197" ...
## $ X.139..1 : chr "139" "142" "114" "197" ...
## $ X.142. : chr "142" "155" "131" "163" ...
## $ X.136. : chr "136" "169" "131" "147" ...
## $ X.139..2 : chr "139" "169" "129" "171" ...
## $ X.147. : chr "147" "166" "116" "180" ...
## $ X.145. : chr "145" "181" "111" "168" ...
## $ X.167. : chr "167" "193" "116" "168" ...
## $ X.170. : chr "170" "175" "116" "166" ...
## $ X.167..1 : chr "167" "164" "114" "149" ...
## $ X.173. : chr "173" "179" "112" "147" ...
## $ X.169. : chr "169" "181" "109" "157" ...
## $ X.174. : chr "174" "185" "120" "167" ...
## $ X.167..2 : chr "167" "177" "133" "161" ...
## $ X.165. : chr "165" "169" "117" "159" ...
## $ X.170..1 : chr "170" "161" "113" "162" ...
## $ X.187. : chr "187" "151" "114" "163" ...
## $ X.184. : chr "184" "158" "121" "177" ...
## $ X.169..1 : chr "169" "140" "130" "180" ...
## $ X.158. : chr "158" "120" "117" "177" ...
## $ X.156. : chr "156" "139" "100" "171" ...
## $ X.155. : chr "155" "137" "97" "173" ...
## $ X.107. : chr "107" "117" "115" "164" ...
## $ X.121. : chr "121" "128" "109" "177" ...
## $ X.137..1 : chr "137" "125" "110" "202" ...
## $ X.144. : chr "144" "122" "118" "218" ...
## $ X.159. : chr "159" "148" "136" "207" ...
## $ X.151. : chr "151" "177" "121" "212" ...
## $ X.139..3 : chr "139" "152" "108" "229" ...
## $ X.138. : chr "138" "139" "124" "237" ...
## $ X.134. : chr "134" "146" "128" "165" ...
## $ X.128. : chr "128" "158" "139" "157" ...
## $ X.135..1 : chr "135" "148" "133" "176" ...
## $ X.152. : chr "152" "137" "121" "176" ...
## $ X.150. : chr "150" "164" "108" "175" ...
## $ X.169..2 : chr "169" "180" "103" "177" ...
## $ X.175. : chr "175" "188" "112" "179" ...
## $ X.179. : chr "179" "167" "117" "160" ...
## $ X.184..1 : chr "184" "171" "119" "135" ...
## $ X.177. : chr "177" "184" "113" "122" ...
## $ X.184..2 : chr "184" "189" "121" "135" ...
## $ X.187..1 : chr "187" "174" "136" "167" ...
## $ X.183. : chr "183" "157" "116" "182" ...
## $ X.187..2 : chr "187" "163" "113" "197" ...
## $ X.192. : chr "192" "155" "119" "209" ...
## $ X.188. : chr "188" "156" "130" "229" ...
## $ X.174..1 : chr "174" "149" "145" "228" ...
## $ X.169..3 : chr "169" "129" "139" "226" ...
## $ X.168. : chr "168" "139" "120" "230" ...
## $ X.167..3 : chr "167" "137" "103" "230" ...
## $ X.113. : chr "113" "127" "109" "161" ...
## $ X.123..1 : chr "123" "121" "107" "171" ...
## $ X.133. : chr "133" "136" "105" "186" ...
## $ X.145..1 : chr "145" "131" "122" "178" ...
## $ X.168..1 : chr "168" "150" "134" "169" ...
## $ X.153. : chr "153" "165" "114" "142" ...
## $ X.134..1 : chr "134" "165" "118" "170" ...
## $ X.133..1 : chr "133" "157" "121" "205" ...
## $ X.129. : chr "129" "157" "106" "169" ...
## $ X.123..2 : chr "123" "151" "117" "176" ...
## $ X.141. : chr "141" "137" "114" "179" ...
## $ X.153..1 : chr "153" "134" "104" "187" ...
## $ X.153..2 : chr "153" "156" "117" "188" ...
## $ X.168..2 : chr "168" "163" "123" "192" ...
## $ X.181. : chr "181" "184" "117" "191" ...
## $ X.184..3 : chr "184" "179" "116" "162" ...
## $ X.184..4 : chr "184" "173" "106" "121" ...
## $ X.179..1 : chr "179" "185" "113" "92" ...
## $ X.187..3 : chr "187" "173" "112" "128" ...
## $ X.185. : chr "185" "180" "133" "182" ...
## $ X.183..1 : chr "183" "151" "143" "166" ...
## $ X.199. : chr "199" "127" "120" "219" ...
## $ X.199..1 : chr "199" "124" "118" "221" ...
## $ X.194. : chr "194" "114" "131" "229" ...
## $ X.189. : chr "189" "130" "114" "228" ...
## $ X.181..1 : chr "181" "133" "117" "223" ...
## $ X.178. : chr "178" "127" "120" "221" ...
## $ X.172. : chr "172" "127" "108" "219" ...
## $ X.136..1 : chr "136" "138" "101" "170" ...
## $ X.129..1 : chr "129" "146" "107" "167" ...
## $ X.136..2 : chr "136" "138" "108" "156" ...
## $ X.153..3 : chr "153" "148" "119" "145" ...
## $ X.165..1 : chr "165" "141" "120" "164" ...
## $ X.162. : chr "162" "149" "109" "148" ...
## $ X.149. : chr "149" "162" "119" "98" ...
## $ X.156..1 : chr "156" "148" "128" "135" ...
## $ X.150..1 : chr "150" "161" "126" "183" ...
## $ X.143..1 : chr "143" "142" "125" "174" ...
## $ X.165..2 : chr "165" "143" "117" "152" ...
## $ X.167..4 : chr "167" "151" "130" "183" ...
## $ X.166. : chr "166" "145" "141" "190" ...
## $ X.175..1 : chr "175" "163" "134" "187" ...
## $ X.177..1 : chr "177" "185" "125" "176" ...
## [list output truncated]
colnames(df) <- c(paste0("pixel", 1:784), "favorite", "category")
df$favorite <- as.factor(df$favorite)
df$category <- as.factor(df$category)
df$category <- as.numeric(df$category)
set.seed(45)
train_index <- createDataPartition(df$favorite, p = .7, list=FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
#model <- glm(favorite~., data = train_data, family = binomial)
#summary(model)
because of higher dimention it is hard to train so we use pca to reduce the dimentionality.
train_data[, !names(train_data) %in% c("favorite", "category")] <- lapply(train_data[, !names(train_data) %in% c("favorite", "category")], as.numeric)
test_data[, !names(train_data) %in% c("favorite", "category")] <- lapply(test_data[, !names(train_data) %in% c("favorite", "category")], as.numeric)
scale_train <- scale(train_data[, !names(train_data) %in% c("favorite", "category")])
scale_test <- scale(test_data[, !names(train_data) %in% c("favorite", "category")])
pca_tr <- prcomp(scale_train, center = FALSE, scale = FALSE)
#sample pca result
pca_tr$x[1:5,1:15]
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 2 -5.49499882 4.2137970 -9.0999637 1.4246030 -0.6610506 0.1498807 -1.090098
## 3 0.06259991 9.9669329 1.4200319 0.8183340 -1.1492677 -0.2524310 2.250656
## 4 2.38621311 0.4521827 -0.8269685 -0.1265222 0.6714670 0.2489085 1.266212
## 5 -5.00227507 4.5019779 -8.7360584 1.4291610 -0.7803301 0.1734669 -1.034691
## 6 -18.35184733 -0.6523587 0.5689522 7.3296221 1.4681680 0.2182316 2.805686
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## 2 0.2384838 -0.8628841 1.4436313 -0.10345712 0.1013756 -1.1539528 0.3266641
## 3 -1.6541231 -1.2715212 0.3707211 -3.44942689 1.1149501 -1.1122315 0.3916325
## 4 -0.1715415 -2.8568604 0.6348533 -0.29784121 0.5338896 0.4580468 0.3767122
## 5 0.2914949 -0.8001614 1.3778328 -0.09016435 0.1078102 -1.0785787 0.3151480
## 6 3.8281287 1.0478799 -0.6700217 -0.57399371 1.3592313 -1.5420958 -2.4105858
## PC15
## 2 -0.5782168
## 3 1.7371740
## 4 -1.6365730
## 5 -0.5438888
## 6 -0.8557334
train_df <- data.frame(pca_tr$x[,1:50], favorite = train_data$favorite)
train_df$category <- train_data$category
test_df <- as.data.frame(predict(pca_tr, newdata = scale_test)[, 1:50])
test_df$favorite <- test_data$favorite
test_df$category <- test_data$category
dim(train_df)
## [1] 228 52
dim(test_df)
## [1] 97 52
model <- glm(favorite~., data = train_df, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model)
##
## Call:
## glm(formula = favorite ~ ., family = binomial, data = train_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 29.9717 78213.7762 0.000 1.000
## PC1 1.5955 1465.0678 0.001 0.999
## PC2 0.9147 5057.6389 0.000 1.000
## PC3 -0.8162 5660.6583 0.000 1.000
## PC4 2.5604 5282.4072 0.000 1.000
## PC5 0.3466 4432.0143 0.000 1.000
## PC6 -3.0644 6635.5827 0.000 1.000
## PC7 -0.1666 6753.3479 0.000 1.000
## PC8 0.0980 10161.6468 0.000 1.000
## PC9 -0.8527 6419.8291 0.000 1.000
## PC10 -3.6420 14422.5592 0.000 1.000
## PC11 2.8790 9159.9165 0.000 1.000
## PC12 -3.0246 15921.2475 0.000 1.000
## PC13 7.9035 10347.2079 0.001 0.999
## PC14 2.6336 17480.0666 0.000 1.000
## PC15 3.8821 11173.5586 0.000 1.000
## PC16 1.4842 17104.9828 0.000 1.000
## PC17 -2.3405 14062.5917 0.000 1.000
## PC18 1.6280 10938.2085 0.000 1.000
## PC19 9.0613 18515.1289 0.000 1.000
## PC20 -11.4049 10355.9018 -0.001 0.999
## PC21 -4.0665 12791.2083 0.000 1.000
## PC22 -0.1198 21282.9384 0.000 1.000
## PC23 -7.2489 12685.9469 -0.001 1.000
## PC24 5.4674 16291.3743 0.000 1.000
## PC25 0.6129 19780.4054 0.000 1.000
## PC26 -1.2440 12825.5521 0.000 1.000
## PC27 5.1228 12043.7652 0.000 1.000
## PC28 -2.7165 16298.6711 0.000 1.000
## PC29 -8.5042 11553.3708 -0.001 0.999
## PC30 -7.5231 19323.7645 0.000 1.000
## PC31 1.7186 14905.3893 0.000 1.000
## PC32 12.1632 14930.3652 0.001 0.999
## PC33 4.7950 35677.5566 0.000 1.000
## PC34 12.6892 16694.5793 0.001 0.999
## PC35 5.8766 23895.1123 0.000 1.000
## PC36 -6.7885 17594.4142 0.000 1.000
## PC37 7.7477 20334.8434 0.000 1.000
## PC38 5.9656 14029.4113 0.000 1.000
## PC39 -1.2887 35022.3576 0.000 1.000
## PC40 -3.6231 21427.8016 0.000 1.000
## PC41 -4.2559 32695.1357 0.000 1.000
## PC42 5.8827 25508.3381 0.000 1.000
## PC43 -0.2859 18743.3619 0.000 1.000
## PC44 11.7459 24603.6469 0.000 1.000
## PC45 3.0038 22086.9724 0.000 1.000
## PC46 -4.5530 27117.9812 0.000 1.000
## PC47 1.9184 20499.6500 0.000 1.000
## PC48 14.1599 16513.4860 0.001 0.999
## PC49 -5.6806 25612.5983 0.000 1.000
## PC50 6.0938 15349.6183 0.000 1.000
## category -67.0967 45259.3985 -0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1.5344e+02 on 227 degrees of freedom
## Residual deviance: 1.8989e-08 on 176 degrees of freedom
## AIC: 104
##
## Number of Fisher Scoring iterations: 25
y_predict <- predict(model, newdata = test_df, type = "response")
predict_label <- ifelse(y_predict > 0.5, 1 , 0)
table(predict = predict_label, actual = test_df$favorite)
## actual
## predict 0 1
## 0 87 10
accuracy <- mean(predict_label == test_data$favorite)
accuracy
## [1] 0.8969072
data.frame(predict_label, test_df$favorite)
## predict_label test_df.favorite
## 1 0 0
## 7 0 0
## 8 0 0
## 15 0 0
## 16 0 0
## 17 0 0
## 20 0 0
## 21 0 0
## 23 0 0
## 24 0 0
## 25 0 0
## 26 0 0
## 29 0 0
## 32 0 0
## 33 0 1
## 34 0 1
## 37 0 1
## 40 0 1
## 42 0 1
## 48 0 1
## 53 0 1
## 59 0 1
## 60 0 1
## 66 0 1
## 67 0 0
## 70 0 0
## 72 0 0
## 80 0 0
## 82 0 0
## 86 0 0
## 87 0 0
## 103 0 0
## 105 0 0
## 108 0 0
## 110 0 0
## 111 0 0
## 116 0 0
## 120 0 0
## 125 0 0
## 126 0 0
## 132 0 0
## 134 0 0
## 137 0 0
## 138 0 0
## 139 0 0
## 141 0 0
## 142 0 0
## 143 0 0
## 144 0 0
## 145 0 0
## 149 0 0
## 150 0 0
## 151 0 0
## 152 0 0
## 157 0 0
## 158 0 0
## 165 0 0
## 167 0 0
## 175 0 0
## 177 0 0
## 178 0 0
## 184 0 0
## 186 0 0
## 190 0 0
## 192 0 0
## 193 0 0
## 196 0 0
## 198 0 0
## 199 0 0
## 200 0 0
## 201 0 0
## 208 0 0
## 209 0 0
## 217 0 0
## 219 0 0
## 220 0 0
## 221 0 0
## 222 0 0
## 223 0 0
## 228 0 0
## 233 0 0
## 236 0 0
## 237 0 0
## 243 0 0
## 246 0 0
## 249 0 0
## 254 0 0
## 256 0 0
## 263 0 0
## 269 0 0
## 270 0 0
## 280 0 0
## 281 0 0
## 290 0 0
## 291 0 0
## 295 0 0
## 325 0 0
Since the Favorite animal constitues only 10% of the dataset, this introduce significant class inbalance. This imbalance means that the model may endup biased towards predicting the non favorite class.
df[, !names(df) %in% c("favorite", "category")] <- lapply(df[, !names(df) %in% c("favorite", "category")], as.numeric)
scaled_df <-scale(df[, !names(df) %in% c("favorite", "category")])
dim(scaled_df)
## [1] 325 784
#clusterable ???
set.seed(123)
hopkinsts <- hopkins(scaled_df, n=nrow(scaled_df)-1)
## Warning: Package `clustertend` is deprecated. Use package `hopkins` instead.
print(hopkinsts)
## $H
## [1] 0.3868863
The Hopkins statistic is used to assess the cluster tendency of a dataset, i.e., whether the data contains meaningful clusters or if the data points are randomly distributed. The value of the Hopkins statistic ranges from 0 to 1:
A value close to 1 indicates that the dataset is highly clustered and not random. A value close to 0.5 suggests that the data is uniformly distributed or random and does not exhibit cluster structure. A value close to 0 would indicate anti-clustering or regular spacing between points.
Hopkins value is less that .5 which indicate this dataset is not good for clustering.
Anyway we will try doing clustering.
km <- kmeans(scaled_df, centers = 3, nstart = 25)
fviz_cluster(km,data = scaled_df, geom = "point")
km$cluster
## [1] 2 2 2 2 2 1 1 2 2 1 1 2 2 1 2 2 1 1 1 2 1 1 1 1 1 2 1 2 2 1 1 2 2 1 2 2 1
## [38] 1 3 2 2 2 2 2 3 2 3 2 2 2 1 3 2 2 2 2 3 2 1 1 2 2 2 2 2 2 2 1 2 2 2 1 1 1
## [75] 2 2 1 2 2 1 1 1 1 1 2 2 1 1 2 1 2 2 2 2 1 2 1 1 2 1 2 1 1 1 2 1 1 1 2 2 1
## [112] 2 2 2 1 2 2 1 1 1 2 2 1 1 2 1 2 2 2 1 2 1 1 2 1 1 2 1 1 1 2 2 3 2 2 2 3 1
## [149] 3 1 1 1 1 3 1 1 1 1 1 2 2 1 2 1 2 3 2 3 2 2 1 2 2 3 3 2 3 3 3 3 3 1 2 3 1
## [186] 1 3 1 2 1 2 2 3 1 2 3 3 1 3 1 2 2 1 2 1 3 3 3 2 3 3 3 2 3 2 2 3 3 1 1 1 3
## [223] 3 3 3 2 3 3 3 1 1 3 3 1 2 1 1 1 1 2 1 2 2 2 2 3 3 3 2 1 1 3 2 3 3 1 2 3 2
## [260] 3 2 3 1 3 2 2 1 3 3 3 3 2 3 3 3 3 3 2 2 3 3 3 2 3 3 2 2 1 3 1 3 3 1 1 1 2
## [297] 3 3 3 1 3 3 2 1 3 2 2 3 3 2 2 1 3 2 3 3 1 1 3 1 1 3 3 3 2
df$category <- as.factor(df$category)
df$category <- as.numeric(df$category)
df$category
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [260] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [297] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
table(km$cluster, df$category)
##
## 1 2 3
## 1 73 19 20
## 2 83 20 26
## 3 9 33 42
pca_tr <- prcomp(scaled_df, center = FALSE, scale = FALSE)
scaled_df_ <- data.frame(pca_tr$x[,1:50])
hopkinsts <- hopkins(scaled_df_, n=nrow(scaled_df_)-1)
## Warning: Package `clustertend` is deprecated. Use package `hopkins` instead.
print(hopkinsts)
## $H
## [1] 0.3293306
km <- kmeans(scaled_df_, centers = 3, nstart = 25)
fviz_cluster(km,data = scaled_df_, geom = "point")
table(km$cluster, df$category)
##
## 1 2 3
## 1 9 33 42
## 2 73 19 20
## 3 83 20 26
we tried to Cluster using category (animals, fruits and vegetable), but the result suggest that clusters are not seperated according to categories, and shows some degree of overlaping across the categories. cluster 1 has a majority of 3rd item. but still contain 9 animals, 33 fruits and 42 vegetables together. cluster 2 has a majority of animals.
hc1 <- hclust(dist(scaled_df), method = "ward.D2")
plot(hc1)
clusters <- cutree(hc1, k = 3)
table(clusters, df$category)
##
## clusters 1 2 3
## 1 38 24 28
## 2 124 29 32
## 3 3 19 28
cluster 1 almost equally contain all category cluster 2 has majority of category animals. cluster 3 has majority of 3rd category vegetables
kNNdistplot(x = scaled_df, k = 5)
abline(h = 28, col="black")
knndistplot shows that the eps(radius = 30) if we reduce it we will get a lot of outliers. no proper cluster.
dbs <- dbscan(scaled_df, eps = 30, minPts = 3)
table(dbs$cluster, df$category)
##
## 1 2 3
## 0 1 0 3
## 1 164 72 85
fviz_cluster(dbs, data=scaled_df,geom = "point")
we used dbscan to cluster, cluster 0 represents the noise points. Mainly 1 cluster is there which contain almost all the image.
Both the logistic Regression and ANN model with different configuration achived accuracy of 89.69%.
netlayer <- c(10,20,5)
net.network <-neuralnet(favorite ~ . , train_df, hidden = netlayer, threshold = 0.01)
nn.predict <- predict(net.network, test_df[, !names(test_df) %in% 'favorite'])
predicted_class <- ifelse(nn.predict[,1] >.5, 1, 0)
confusion_matrix <- table(predicted = predicted_class, actual = test_df$favorite)
accuracy <-sum(diag(confusion_matrix)) / sum(confusion_matrix)
accuracy
## [1] 0.8969072