1.Load Package

library(class)     
## Warning: package 'class' was built under R version 4.4.3
library(caret)     
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.4.3

2. Dataset kepuasan pelanggan

data <- read.csv("C:/R/dataset_kepuasan_buatan.csv")
summary(data)
##       Umur       Jenis_Kelamin      Lama_Pelanggan  Frekuensi_Beli 
##  Min.   :18.00   Length:200         Min.   : 1.00   Min.   : 1.00  
##  1st Qu.:30.00   Class :character   1st Qu.: 3.00   1st Qu.: 5.00  
##  Median :40.00   Mode  :character   Median : 5.00   Median :11.00  
##  Mean   :40.01                      Mean   : 5.41   Mean   :10.47  
##  3rd Qu.:50.00                      3rd Qu.: 8.00   3rd Qu.:16.00  
##  Max.   :60.00                      Max.   :10.00   Max.   :20.00  
##  Rating_Pelayanan Rating_Produk  Kategori_Produk      Kepuasan        
##  Min.   :1.000    Min.   :1.00   Length:200         Length:200        
##  1st Qu.:2.000    1st Qu.:2.00   Class :character   Class :character  
##  Median :3.000    Median :3.00   Mode  :character   Mode  :character  
##  Mean   :3.085    Mean   :2.91                                        
##  3rd Qu.:4.000    3rd Qu.:4.00                                        
##  Max.   :5.000    Max.   :5.00
sum(is.na(data))
## [1] 0

3. Ambil kolom yang diperlukan

data_selected <- data[, c("Rating_Pelayanan", "Lama_Pelanggan", "Frekuensi_Beli", "Rating_Produk")]

4. Normalisasi Data

normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}
data_norm <- as.data.frame(lapply(data_selected, normalize))
data_labels <- factor(data$Kepuasan, levels = c("Puas", "Netral", "Tidak Puas"))

5. Split Data Train/Test

set.seed(123)
train_index <- sample(1:nrow(data), 0.8 * nrow(data))
train_data <- data_norm[train_index, ]
test_data <- data_norm[-train_index, ]
train_labels <- data_labels[train_index]
test_labels <- data_labels[-train_index]

6. Latih Model K-NN dan Evaluasi

predicted_labels <- knn(train = train_data, test = test_data, cl = train_labels, k = 9)
conf_matrix <- confusionMatrix(predicted_labels, test_labels)
print(conf_matrix)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Puas Netral Tidak Puas
##   Puas         14      2          0
##   Netral        1     11          2
##   Tidak Puas    0      0         10
## 
## Overall Statistics
##                                          
##                Accuracy : 0.875          
##                  95% CI : (0.732, 0.9581)
##     No Information Rate : 0.375          
##     P-Value [Acc > NIR] : 8.429e-11      
##                                          
##                   Kappa : 0.811          
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: Puas Class: Netral Class: Tidak Puas
## Sensitivity               0.9333        0.8462            0.8333
## Specificity               0.9200        0.8889            1.0000
## Pos Pred Value            0.8750        0.7857            1.0000
## Neg Pred Value            0.9583        0.9231            0.9333
## Prevalence                0.3750        0.3250            0.3000
## Detection Rate            0.3500        0.2750            0.2500
## Detection Prevalence      0.4000        0.3500            0.2500
## Balanced Accuracy         0.9267        0.8675            0.9167

7. Prediksi Kasus Hikmah Risfi Faizza

Kasus Pelanggan: Hikmah Risfi Faizza Nama: Hikmah Risfi Faizza Umur: 21 tahun Jenis Kelamin: Wanita Lama Menjadi Pelanggan: 3 tahun Frekuensi Pembelian: 10 kali Rating Pelayanan: 3 dari 5 Rating Produk: 4 dari 5

Data Hikmah Risfi Faizza

hikmah <- data.frame(
  Rating_Pelayanan = (3 - min(data$Rating_Pelayanan)) / (max(data$Rating_Pelayanan) - min(data$Rating_Pelayanan)),
  Lama_Pelanggan = (3 - min(data$Lama_Pelanggan)) / (max(data$Lama_Pelanggan) - min(data$Lama_Pelanggan)),
  Frekuensi_Beli = (10 - min(data$Frekuensi_Beli)) / (max(data$Frekuensi_Beli) - min(data$Frekuensi_Beli)),
  Rating_Produk = (4 - min(data$Rating_Produk)) / (max(data$Rating_Produk) - min(data$Rating_Produk)))

Prediksi

hikmah_pred <- knn(train = train_data, test = hikmah, cl = train_labels, k = 9)
cat("Prediksi Kepuasan Hikmah Risfi Faizza:", as.character(hikmah_pred), "\n")
## Prediksi Kepuasan Hikmah Risfi Faizza: Puas

#Visualisasi Posisi Hikmah dalam Data

pca <- prcomp(data_norm)
pca_data <- data.frame(pca$x[,1:2], Kepuasan = data_labels)

hikmah_pca <- predict(pca, newdata = hikmah)
hikmah_point <- data.frame(PC1 = hikmah_pca[1], PC2 = hikmah_pca[2], Kepuasan = "Hikmah")

ggplot(pca_data, aes(x = PC1, y = PC2, color = Kepuasan)) +
  geom_point(alpha = 0.6) +
  geom_point(data = hikmah_point, aes(x = PC1, y = PC2), 
             color = "black", size = 4, shape = 17) +
  annotate("text", x = hikmah_point$PC1, y = hikmah_point$PC2 + 0.03, 
           label = "Hikmah", color = "black", size = 4) +
  labs(title = "Visualisasi PCA dari Kepuasan Pelanggan", 
       subtitle = "Titik hitam adalah prediksi untuk Hikmah Risfi Faizza") +
  theme_minimal()

summary(pca)
## Importance of components:
##                           PC1    PC2    PC3    PC4
## Standard deviation     0.3774 0.3486 0.3116 0.3071
## Proportion of Variance 0.3127 0.2669 0.2133 0.2071
## Cumulative Proportion  0.3127 0.5796 0.7929 1.0000