data <- data.frame( Client_ID = 1:30, Annual_Income = c(60,85, 45, 70, 30, 95, 40, 75, 50, 90, 55, 80, 40, 65, 85, 50, 75, 30, 90, 40, 60, 70, 50, 80, 45, 65, 90, 35, 75, 55), Spending_Score = c(50, 20, 75, 30, 90, 15, 85, 40, 65, 25, 55, 35, 70, 45, 25, 60, 30, 85, 20, 75, 65, 50, 80, 35, 70, 55, 30, 85, 25, 60), Account_Balance = c(25, 40, 15, 35, 10, 50, 12, 30, 20, 45, 20, 35, 10, 25, 50, 15, 40, 20, 45, 10, 30, 25, 20, 40, 15, 25, 50, 10, 35, 20), Number_of_Transactions
= c(120, 100, 140, 110, 160, 80, 150, 115, 130, 90, 130, 120, 140, 105, 95, 135, 100, 155, 85, 145, 125, 110, 140, 120, 105, 95, 150, 115, 140, 130))
data
## Client_ID Annual_Income Spending_Score Account_Balance
## 1 1 60 50 25
## 2 2 85 20 40
## 3 3 45 75 15
## 4 4 70 30 35
## 5 5 30 90 10
## 6 6 95 15 50
## 7 7 40 85 12
## 8 8 75 40 30
## 9 9 50 65 20
## 10 10 90 25 45
## 11 11 55 55 20
## 12 12 80 35 35
## 13 13 40 70 10
## 14 14 65 45 25
## 15 15 85 25 50
## 16 16 50 60 15
## 17 17 75 30 40
## 18 18 30 85 20
## 19 19 90 20 45
## 20 20 40 75 10
## 21 21 60 65 30
## 22 22 70 50 25
## 23 23 50 80 20
## 24 24 80 35 40
## 25 25 45 70 15
## 26 26 65 55 25
## 27 27 90 30 50
## 28 28 35 85 10
## 29 29 75 25 35
## 30 30 55 60 20
## Number_of_Transactions
## 1 120
## 2 100
## 3 140
## 4 110
## 5 160
## 6 80
## 7 150
## 8 115
## 9 130
## 10 90
## 11 130
## 12 120
## 13 140
## 14 105
## 15 95
## 16 135
## 17 100
## 18 155
## 19 85
## 20 145
## 21 125
## 22 110
## 23 140
## 24 120
## 25 105
## 26 95
## 27 150
## 28 115
## 29 140
## 30 130
data_clean <- data[, -1]
data_clean
## Annual_Income Spending_Score Account_Balance Number_of_Transactions
## 1 60 50 25 120
## 2 85 20 40 100
## 3 45 75 15 140
## 4 70 30 35 110
## 5 30 90 10 160
## 6 95 15 50 80
## 7 40 85 12 150
## 8 75 40 30 115
## 9 50 65 20 130
## 10 90 25 45 90
## 11 55 55 20 130
## 12 80 35 35 120
## 13 40 70 10 140
## 14 65 45 25 105
## 15 85 25 50 95
## 16 50 60 15 135
## 17 75 30 40 100
## 18 30 85 20 155
## 19 90 20 45 85
## 20 40 75 10 145
## 21 60 65 30 125
## 22 70 50 25 110
## 23 50 80 20 140
## 24 80 35 40 120
## 25 45 70 15 105
## 26 65 55 25 95
## 27 90 30 50 150
## 28 35 85 10 115
## 29 75 25 35 140
## 30 55 60 20 130
data_scaled <- scale(data_clean)
data_scaled
## Annual_Income Spending_Score Account_Balance Number_of_Transactions
## [1,] -0.127569 -0.07205241 -0.1823923 -0.05341574
## [2,] 1.148121 -1.36899586 0.9575595 -0.96911422
## [3,] -0.892983 1.00873379 -0.9423602 0.86228273
## [4,] 0.382707 -0.93668138 0.5775756 -0.51126498
## [5,] -1.658397 1.65720551 -1.3223441 1.77798121
## [6,] 1.658397 -1.58515310 1.7175274 -1.88481270
## [7,] -1.148121 1.44104827 -1.1703505 1.32013197
## [8,] 0.637845 -0.50436690 0.1975916 -0.28234036
## [9,] -0.637845 0.57641931 -0.5623762 0.40443349
## [10,] 1.403259 -1.15283862 1.3375435 -1.42696346
## [11,] -0.382707 0.14410483 -0.5623762 0.40443349
## [12,] 0.892983 -0.72052414 0.5775756 -0.05341574
## [13,] -1.148121 0.79257655 -1.3223441 0.86228273
## [14,] 0.127569 -0.28820965 -0.1823923 -0.74018960
## [15,] 1.148121 -1.15283862 1.7175274 -1.19803884
## [16,] -0.637845 0.36026207 -0.9423602 0.63335811
## [17,] 0.637845 -0.93668138 0.9575595 -0.96911422
## [18,] -1.658397 1.44104827 -0.5623762 1.54905659
## [19,] 1.403259 -1.36899586 1.3375435 -1.65588808
## [20,] -1.148121 1.00873379 -1.3223441 1.09120735
## [21,] -0.127569 0.57641931 0.1975916 0.17550888
## [22,] 0.382707 -0.07205241 -0.1823923 -0.51126498
## [23,] -0.637845 1.22489103 -0.5623762 0.86228273
## [24,] 0.892983 -0.72052414 0.9575595 -0.05341574
## [25,] -0.892983 0.79257655 -0.9423602 -0.74018960
## [26,] 0.127569 0.14410483 -0.1823923 -1.19803884
## [27,] 1.403259 -0.93668138 1.7175274 1.32013197
## [28,] -1.403259 1.44104827 -1.3223441 -0.28234036
## [29,] 0.637845 -1.15283862 0.5775756 0.86228273
## [30,] -0.382707 0.36026207 -0.5623762 0.40443349
## attr(,"scaled:center")
## Annual_Income Spending_Score Account_Balance
## 62.50000 51.66667 27.40000
## Number_of_Transactions
## 121.16667
## attr(,"scaled:scale")
## Annual_Income Spending_Score Account_Balance
## 19.59724 23.13131 13.15845
## Number_of_Transactions
## 21.84125
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(data_scaled, kmeans, method = "wss")
set.seed(123)
kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)
print(kmeans_result)
## K-means clustering with 3 clusters of sizes 11, 6, 13
##
## Cluster means:
## Annual_Income Spending_Score Account_Balance Number_of_Transactions
## 1 0.4754845 -0.4257643 0.3703116 -0.0950384
## 2 1.2331670 -1.2609172 1.3375435 -1.3506553
## 3 -0.9714870 0.9422239 -0.9306684 0.7037965
##
## Clustering vector:
## [1] 1 2 3 1 3 2 3 1 3 2 3 1 3 1 2 3 2 3 2 3 1 1 3 1 3 1 1 3 1 3
##
## Within cluster sum of squares by cluster:
## [1] 13.659955 2.140843 12.257414
## (between_SS / total_SS = 75.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(kmeans_result, data = data_scaled)
data$Cluster <- kmeans_result$cluster
print(data)
## Client_ID Annual_Income Spending_Score Account_Balance
## 1 1 60 50 25
## 2 2 85 20 40
## 3 3 45 75 15
## 4 4 70 30 35
## 5 5 30 90 10
## 6 6 95 15 50
## 7 7 40 85 12
## 8 8 75 40 30
## 9 9 50 65 20
## 10 10 90 25 45
## 11 11 55 55 20
## 12 12 80 35 35
## 13 13 40 70 10
## 14 14 65 45 25
## 15 15 85 25 50
## 16 16 50 60 15
## 17 17 75 30 40
## 18 18 30 85 20
## 19 19 90 20 45
## 20 20 40 75 10
## 21 21 60 65 30
## 22 22 70 50 25
## 23 23 50 80 20
## 24 24 80 35 40
## 25 25 45 70 15
## 26 26 65 55 25
## 27 27 90 30 50
## 28 28 35 85 10
## 29 29 75 25 35
## 30 30 55 60 20
## Number_of_Transactions Cluster
## 1 120 1
## 2 100 2
## 3 140 3
## 4 110 1
## 5 160 3
## 6 80 2
## 7 150 3
## 8 115 1
## 9 130 3
## 10 90 2
## 11 130 3
## 12 120 1
## 13 140 3
## 14 105 1
## 15 95 2
## 16 135 3
## 17 100 2
## 18 155 3
## 19 85 2
## 20 145 3
## 21 125 1
## 22 110 1
## 23 140 3
## 24 120 1
## 25 105 3
## 26 95 1
## 27 150 1
## 28 115 3
## 29 140 1
## 30 130 3
aggregate(data[, c("Annual_Income", "Spending_Score", "Account_Balance", "Number_of_Transactions")],
by = list(Cluster = data$Cluster),
FUN = mean)
## Cluster Annual_Income Spending_Score Account_Balance Number_of_Transactions
## 1 1 71.81818 41.81818 32.27273 119.09091
## 2 2 86.66667 22.50000 45.00000 91.66667
## 3 3 43.46154 73.46154 15.15385 136.53846