Dataset –> Credit Card Data Intermediate Link Dataset –> https://www.kaggle.com/datasets/samira1992/credit-card-data-intermediate-dataset
X1 = BALANCE X2 = BALANCE_FREQUENCY X3 = PURCHASES X4 = ONEOFF_PURCHASES X5 = INSTALLMENTS_PURCHASES X6 = CASH_ADVANCE X7 = PURCHASES_FREQUENCY X8 = ONEOFF_PURCHASES_FREQUENCY X9 = PURCHASES_INSTALLMENTS_FREQUENCY X10 = CASH_ADVANCE_FREQUENCY X11 = CASH_ADVANCE_TRX X12 = PURCHASES_TRX X13 = CREDIT_LIMIT X14 = PAYMENTS X15 = MINIMUM_PAYMENTS X16 = PRC_FULL_PAYMENT X17 = TENURE
library("tidyverse")
library("flexclust")
library("dbscan")
library("meanShiftR")
library("e1071")
library("cluster")
library("fpc")
library("mclust")
data <- read.csv("D:/BIODATA KAFKA/Sem 4/Customer_Data.csv", header = TRUE, sep =",")
summary(data)
## CUST_ID BALANCE BALANCE_FREQUENCY PURCHASES
## Length:8950 Min. : 0.0 Min. :0.0000 Min. : 0.00
## Class :character 1st Qu.: 128.3 1st Qu.:0.8889 1st Qu.: 39.63
## Mode :character Median : 873.4 Median :1.0000 Median : 361.28
## Mean : 1564.5 Mean :0.8773 Mean : 1003.21
## 3rd Qu.: 2054.1 3rd Qu.:1.0000 3rd Qu.: 1110.13
## Max. :19043.1 Max. :1.0000 Max. :49039.57
##
## ONEOFF_PURCHASES INSTALLMENTS_PURCHASES CASH_ADVANCE PURCHASES_FREQUENCY
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. :0.00000
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.:0.08333
## Median : 38.0 Median : 89.0 Median : 0.0 Median :0.50000
## Mean : 592.4 Mean : 411.1 Mean : 978.9 Mean :0.49035
## 3rd Qu.: 577.4 3rd Qu.: 468.6 3rd Qu.: 1113.8 3rd Qu.:0.91667
## Max. :40761.2 Max. :22500.0 Max. :47137.2 Max. :1.00000
##
## ONEOFF_PURCHASES_FREQUENCY PURCHASES_INSTALLMENTS_FREQUENCY
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.08333 Median :0.1667
## Mean :0.20246 Mean :0.3644
## 3rd Qu.:0.30000 3rd Qu.:0.7500
## Max. :1.00000 Max. :1.0000
##
## CASH_ADVANCE_FREQUENCY CASH_ADVANCE_TRX PURCHASES_TRX CREDIT_LIMIT
## Min. :0.0000 Min. : 0.000 Min. : 0.00 Min. : 50
## 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 1600
## Median :0.0000 Median : 0.000 Median : 7.00 Median : 3000
## Mean :0.1351 Mean : 3.249 Mean : 14.71 Mean : 4494
## 3rd Qu.:0.2222 3rd Qu.: 4.000 3rd Qu.: 17.00 3rd Qu.: 6500
## Max. :1.5000 Max. :123.000 Max. :358.00 Max. :30000
## NA's :1
## PAYMENTS MINIMUM_PAYMENTS PRC_FULL_PAYMENT TENURE
## Min. : 0.0 Min. : 0.019 Min. :0.0000 Min. : 6.00
## 1st Qu.: 383.3 1st Qu.: 169.124 1st Qu.:0.0000 1st Qu.:12.00
## Median : 856.9 Median : 312.344 Median :0.0000 Median :12.00
## Mean : 1733.1 Mean : 864.207 Mean :0.1537 Mean :11.52
## 3rd Qu.: 1901.1 3rd Qu.: 825.485 3rd Qu.:0.1429 3rd Qu.:12.00
## Max. :50721.5 Max. :76406.208 Max. :1.0000 Max. :12.00
## NA's :313
data_num <- data[, -c(1)]
data_clean <- na.omit(data_num)
data_scaled <- scale(data_clean)
avg_sil <- function(k) {
km <- kmeans(data_scaled, centers = k, nstart = 25)
ss <- silhouette(km$cluster, dist(data_scaled))
mean(ss[, 3])
}
k_values <- 2:10
sil_values <- sapply(k_values, avg_sil)
plot(k_values, sil_values,
type = "b",
pch = 19,
xlab = "Jumlah Cluster (k)",
ylab = "Nilai Silhouette",
main = "Silhouette Method")
best_k <- k_values[which.max(sil_values)]
best_k
## [1] 3
km_res <- kmeans(data_scaled, centers = 3)
kmed_res <- kcca(data_scaled, k = 3, family = kccaFamily("kmedians"))
db_res <- dbscan(data_scaled, eps = 0.7, MinPts = 5)
ms_res <- meanShift(data_scaled)
fcm_res <- cmeans(data_scaled, centers = 3, m = 2)
par(mfrow = c(2, 3), mar = c(4, 4, 2, 1))
plot(data_scaled, col = km_res$cluster, main = "K-means")
plot(data_scaled, col = clusters(kmed_res), main = "K-medians")
plot(data_scaled, col = db_res$cluster + 1L, main = "DBSCAN (0 = Noise)")
plot(data_scaled, col = ms_res$assignment, main = "Mean Shift")
plot(data_scaled, col = fcm_res$cluster, main = "Fuzzy C-means")
plot(data_scaled, col = as.numeric(iris$Species), main = "Original Species")
mean(silhouette(km_res$cluster, dist(data_scaled))[,3])
## [1] 0.1798837
stats <- cluster.stats(dist(data_scaled), km_res$cluster)
paste("Dunn Index:", stats$dunn)
## [1] "Dunn Index: 0.00311811597437616"
paste("Within-cluster SS:", stats$within.cluster.ss)
## [1] "Within-cluster SS: 108672.584766356"