library(class)
## Warning: package 'class' was built under R version 4.4.3
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.4.3
data <- read.csv("C:/R/dataset_kepuasan_buatan.csv")
summary(data)
## Umur Jenis_Kelamin Lama_Pelanggan Frekuensi_Beli
## Min. :18.00 Length:200 Min. : 1.00 Min. : 1.00
## 1st Qu.:30.00 Class :character 1st Qu.: 3.00 1st Qu.: 5.00
## Median :40.00 Mode :character Median : 5.00 Median :11.00
## Mean :40.01 Mean : 5.41 Mean :10.47
## 3rd Qu.:50.00 3rd Qu.: 8.00 3rd Qu.:16.00
## Max. :60.00 Max. :10.00 Max. :20.00
## Rating_Pelayanan Rating_Produk Kategori_Produk Kepuasan
## Min. :1.000 Min. :1.00 Length:200 Length:200
## 1st Qu.:2.000 1st Qu.:2.00 Class :character Class :character
## Median :3.000 Median :3.00 Mode :character Mode :character
## Mean :3.085 Mean :2.91
## 3rd Qu.:4.000 3rd Qu.:4.00
## Max. :5.000 Max. :5.00
sum(is.na(data))
## [1] 0
data_selected <- data[, c("Rating_Pelayanan", "Lama_Pelanggan", "Frekuensi_Beli", "Rating_Produk")]
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
data_norm <- as.data.frame(lapply(data_selected, normalize))
data_labels <- factor(data$Kepuasan, levels = c("Puas", "Netral", "Tidak Puas"))
set.seed(123)
train_index <- sample(1:nrow(data), 0.8 * nrow(data))
train_data <- data_norm[train_index, ]
test_data <- data_norm[-train_index, ]
train_labels <- data_labels[train_index]
test_labels <- data_labels[-train_index]
predicted_labels <- knn(train = train_data, test = test_data, cl = train_labels, k = 9)
conf_matrix <- confusionMatrix(predicted_labels, test_labels)
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Puas Netral Tidak Puas
## Puas 14 2 0
## Netral 1 11 2
## Tidak Puas 0 0 10
##
## Overall Statistics
##
## Accuracy : 0.875
## 95% CI : (0.732, 0.9581)
## No Information Rate : 0.375
## P-Value [Acc > NIR] : 8.429e-11
##
## Kappa : 0.811
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Puas Class: Netral Class: Tidak Puas
## Sensitivity 0.9333 0.8462 0.8333
## Specificity 0.9200 0.8889 1.0000
## Pos Pred Value 0.8750 0.7857 1.0000
## Neg Pred Value 0.9583 0.9231 0.9333
## Prevalence 0.3750 0.3250 0.3000
## Detection Rate 0.3500 0.2750 0.2500
## Detection Prevalence 0.4000 0.3500 0.2500
## Balanced Accuracy 0.9267 0.8675 0.9167
Kasus Pelanggan: Hikmah Risfi Faizza Nama: Hikmah Risfi Faizza Umur: 21 tahun Jenis Kelamin: Wanita Lama Menjadi Pelanggan: 3 tahun Frekuensi Pembelian: 10 kali Rating Pelayanan: 3 dari 5 Rating Produk: 4 dari 5
hikmah <- data.frame(
Rating_Pelayanan = (3 - min(data$Rating_Pelayanan)) / (max(data$Rating_Pelayanan) - min(data$Rating_Pelayanan)),
Lama_Pelanggan = (3 - min(data$Lama_Pelanggan)) / (max(data$Lama_Pelanggan) - min(data$Lama_Pelanggan)),
Frekuensi_Beli = (10 - min(data$Frekuensi_Beli)) / (max(data$Frekuensi_Beli) - min(data$Frekuensi_Beli)),
Rating_Produk = (4 - min(data$Rating_Produk)) / (max(data$Rating_Produk) - min(data$Rating_Produk)))
hikmah_pred <- knn(train = train_data, test = hikmah, cl = train_labels, k = 9)
cat("Prediksi Kepuasan Hikmah Risfi Faizza:", as.character(hikmah_pred), "\n")
## Prediksi Kepuasan Hikmah Risfi Faizza: Puas
#Visualisasi Posisi Hikmah dalam Data
pca <- prcomp(data_norm)
pca_data <- data.frame(pca$x[,1:2], Kepuasan = data_labels)
hikmah_pca <- predict(pca, newdata = hikmah)
hikmah_point <- data.frame(PC1 = hikmah_pca[1], PC2 = hikmah_pca[2], Kepuasan = "Hikmah")
ggplot(pca_data, aes(x = PC1, y = PC2, color = Kepuasan)) +
geom_point(alpha = 0.6) +
geom_point(data = hikmah_point, aes(x = PC1, y = PC2),
color = "black", size = 4, shape = 17) +
annotate("text", x = hikmah_point$PC1, y = hikmah_point$PC2 + 0.03,
label = "Hikmah", color = "black", size = 4) +
labs(title = "Visualisasi PCA dari Kepuasan Pelanggan",
subtitle = "Titik hitam adalah prediksi untuk Hikmah Risfi Faizza") +
theme_minimal()
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 0.3774 0.3486 0.3116 0.3071
## Proportion of Variance 0.3127 0.2669 0.2133 0.2071
## Cumulative Proportion 0.3127 0.5796 0.7929 1.0000