df <- read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 4 - Clustering\\Section 25 - Hierarchical Clustering\\Hierarchical_Clustering\\Mall_Customers.csv")
Warning messages:
1: In file(con, "rb") :
cannot open file 'C:/Users/wilsonpc/AppData/Local/RStudio-Desktop/notebooks/FA79C0FA-section25-hierarchical/1/s/cua8e6fjxe79j/temp': Permission denied
2: In file(con, "rb") :
cannot open file 'C:/Users/wilsonpc/AppData/Local/RStudio-Desktop/notebooks/FA79C0FA-section25-hierarchical/1/s/cua8e6fjxe79j/temp': Permission denied
head(df)
Prepare the Data by taking ony the annual income and spending score.
x = df[,4:5]
x
Use the dendogram to find the optimal number of clusters
dendrogram = hclust(dist(x, method ="euclidean"), method = "ward.D")
plot(dendrogram,
main = paste("Dendrogram"),
xlab = "Customer",
ylab = "Euclidean Distance")
So there should be 5 clusters. (or 6?)
# fitting hierarchical clustering to the mall dataset
hc = hclust(dist(x, method = "euclidean"), method = "ward.D")
# we use 5 below for the 5 clusters
y_hc = cutree(hc,6)
y_hc
[1] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 3 1 3 3 3 3 3 3 3 3
[54] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 3 4 3 4 4 3 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
[107] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 4 5 4 5 6 5 6 5 4 5 6 5 6 5 6 5 6 5 4 5 6 5 4 5 6 5 6 5 6 5 6 5 6 5 6
[160] 5 4 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5
Visualizing the clusters
library(cluster)
clusplot(x, y_hc,
lines = 0 ,
shade = TRUE,
color = TRUE,
labels = 2,
plotchar = FALSE,
span = TRUE,
main = paste("Clusters of cleints"),
xlab="Annual Income",
ylab="Spending Score")