Unsupervised Machine Learning in R: K-Means


# message=FALSE

library(factoextra)

# eval=TRUE, echo=TRUE

ABC <-read.table("AbcBank.csv",header=TRUE, sep=",")

ABC_num<- ABC[,2:5]
str(ABC_num)
## 'data.frame':    5000 obs. of  4 variables:
##  $ Age     : int  45 35 43 61 52 63 34 44 45 45 ...
##  $ Income  : int  202 203 201 188 180 178 180 154 200 194 ...
##  $ CCAvg   : num  10 10 10 9.3 9 9 8.9 8.8 8.8 8.8 ...
##  $ Mortgage: int  0 0 0 0 297 0 0 0 0 428 ...
head(ABC_num)
##   Age Income CCAvg Mortgage
## 1  45    202  10.0        0
## 2  35    203  10.0        0
## 3  43    201  10.0        0
## 4  61    188   9.3        0
## 5  52    180   9.0      297
## 6  63    178   9.0        0
ABC_scaled <-as.data.frame(scale(ABC_num))  #표준화 
head(ABC_scaled)
##           Age   Income    CCAvg   Mortgage
## 1 -0.02952064 2.785475 4.613064 -0.5554684
## 2 -0.90188002 2.807198 4.613064 -0.5554684
## 3 -0.20399252 2.763752 4.613064 -0.5554684
## 4  1.36625436 2.481350 4.212528 -0.5554684
## 5  0.58113092 2.307565 4.040870  2.3644893
## 6  1.54072623 2.264118 4.040870 -0.5554684

# eval=TRUE, echo=TRUE

k3<- kmeans(ABC_scaled, centers=3, nstart=25)

# eval=TRUE, echo=TRUE

fviz_cluster(k3, data=ABC_scaled,
             ellipse.type="convex",
             axes =c(1,2),
             geom="point",
             label="none",
             ggtheme=theme_classic())


# eval=TRUE, echo=TRUE

k3$centers          
##           Age     Income      CCAvg   Mortgage
## 1 -0.89279937 -0.3047082 -0.3238836 -0.1233689
## 2  0.87269134 -0.3861332 -0.3707163 -0.1321248
## 3 -0.07556447  1.5435569  1.5495668  0.5693976
ABC$Cluster<-as.numeric(k3$cluster)
table(ABC$Cluster)
## 
##    1    2    3 
## 1979 2104  917