library(class)
library(ggplot2)
library(ggrepel)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(FNN)
## Warning: package 'FNN' was built under R version 4.4.3
##
## Attaching package: 'FNN'
## The following objects are masked from 'package:class':
##
## knn, knn.cv
Seorang pemilik toko ingin mengelompokkan pelanggan berdasarkan usia dan pendapatan mereka untuk menargetkan strategi pemasaran yang lebih efektif. Dalam analisis ini, pelanggan dikelompokkan ke dalam dua kategori:
Seorang pelanggan baru dengan usia 36 tahun dan pendapatan 41 juta ingin diketahui termasuk dalam kategori mana berdasarkan metode K-Nearest Neighbors (KNN) dengan k=5.
data <- data.frame(
Age = c(29, 51, 33, 24, 40, 45, 37, 48, 52, 31, 27, 44, 36, 39, 50, 41, 46, 25, 28, 30),
Income = c(35, 43, 29, 25, 41, 38, 32, 40, 45, 27, 26, 39, 34, 37, 46, 40, 39, 26, 27, 31),
Class = factor(c('A', 'B', 'A', 'A', 'B', 'B', 'A', 'B', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'B', 'B', 'A', 'A', 'A'))
)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.3
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
kable(data) %>% kable_styling(full_width = FALSE)
Age | Income | Class |
---|---|---|
29 | 35 | A |
51 | 43 | B |
33 | 29 | A |
24 | 25 | A |
40 | 41 | B |
45 | 38 | B |
37 | 32 | A |
48 | 40 | B |
52 | 45 | B |
31 | 27 | A |
27 | 26 | A |
44 | 39 | B |
36 | 34 | A |
39 | 37 | A |
50 | 46 | B |
41 | 40 | B |
46 | 39 | B |
25 | 26 | A |
28 | 27 | A |
30 | 31 | A |
scaled_data <- as.data.frame(scale(data[, 1:2]))
scaled_data$Class <- data$Class
new_data <- data.frame(Age = 36, Income = 41)
scaled_new_data <- as.data.frame(scale(new_data, center = colMeans(data[, 1:2]), scale = apply(data[, 1:2], 2, sd)))
k_value <- 5
predicted_class <- knn(train = scaled_data[, 1:2], test = scaled_new_data, cl = scaled_data$Class, k = k_value)
predicted_class <- as.character(predicted_class)
cat("Predicted Class for New Data:", predicted_class, "\n")
## Predicted Class for New Data: B
neighbors_indices <- get.knnx(data = scaled_data[, 1:2], query = scaled_new_data, k = k_value)$nn.index
neighbors <- data[neighbors_indices, ]
plot <- ggplot(data, aes(x = Age, y = Income, color = Class)) +
geom_point(size = 3) +
geom_point(aes(x = new_data$Age, y = new_data$Income), color = 'red', size = 5, shape = 4) +
geom_segment(data = neighbors, aes(x = Age, y = Income, xend = new_data$Age, yend = new_data$Income),
arrow = arrow(length = unit(0.2, "cm")), color = 'blue') +
ggtitle("KNN Classification with Nearest Neighbors") +
theme_minimal()
print(plot)
## Warning in geom_point(aes(x = new_data$Age, y = new_data$Income), color = "red", : All aesthetics have length 1, but the data has 20 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
Berdasarkan hasil klasifikasi dengan metode K-Nearest Neighbors (KNN) dengan k= 5, pelanggan baru dengan usia 36 tahun dan pendapatan 41 juta diklasifikasikan ke dalam kategori B. Hal ini menunjukkan bahwa pelanggan ini memiliki karakteristik yang lebih mirip dengan kategori B, sehingga kemungkinan besar memiliki kecenderungan sesuai dengan kelompok tersebut.