Section 25 Hierarchical Clustering

df <-  read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 4 - Clustering\\Section 25 - Hierarchical Clustering\\Hierarchical_Clustering\\Mall_Customers.csv")
Warning messages:
1: In file(con, "rb") :
  cannot open file 'C:/Users/wilsonpc/AppData/Local/RStudio-Desktop/notebooks/FA79C0FA-section25-hierarchical/1/s/cua8e6fjxe79j/temp': Permission denied
2: In file(con, "rb") :
  cannot open file 'C:/Users/wilsonpc/AppData/Local/RStudio-Desktop/notebooks/FA79C0FA-section25-hierarchical/1/s/cua8e6fjxe79j/temp': Permission denied
head(df)

Prepare the Data by taking ony the annual income and spending score.

x = df[,4:5]
x

Use the dendogram to find the optimal number of clusters

dendrogram = hclust(dist(x, method ="euclidean"), method = "ward.D")
plot(dendrogram,
     main = paste("Dendrogram"),
     xlab = "Customer",
     ylab = "Euclidean Distance")

So there should be 5 clusters. (or 6?)

# fitting hierarchical clustering to the mall dataset
hc = hclust(dist(x, method = "euclidean"), method = "ward.D")
# we use 5 below for the 5 clusters
y_hc = cutree(hc,6)
y_hc
  [1] 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 3 1 3 3 3 3 3 3 3 3
 [54] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 3 4 3 4 4 3 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
[107] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 4 5 4 5 6 5 6 5 4 5 6 5 6 5 6 5 6 5 4 5 6 5 4 5 6 5 6 5 6 5 6 5 6 5 6
[160] 5 4 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5 6 5

Visualizing the clusters

library(cluster)
clusplot(x, y_hc, 
         lines = 0 , 
         shade = TRUE,
         color = TRUE,
         labels = 2, 
         plotchar = FALSE,
         span = TRUE,
         main = paste("Clusters of cleints"),
         xlab="Annual Income",
         ylab="Spending Score")

LS0tDQp0aXRsZTogIk1MIFVzaW5nIFIgU2VjdGlvbiAiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCiMgU2VjdGlvbiAyNSBIaWVyYXJjaGljYWwgQ2x1c3RlcmluZw0KDQpgYGB7cn0NCmRmIDwtICByZWFkLmNzdigiRzpcXFJTdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCA0IC0gQ2x1c3RlcmluZ1xcU2VjdGlvbiAyNSAtIEhpZXJhcmNoaWNhbCBDbHVzdGVyaW5nXFxIaWVyYXJjaGljYWxfQ2x1c3RlcmluZ1xcTWFsbF9DdXN0b21lcnMuY3N2IikNCmhlYWQoZGYpDQpgYGANCg0KUHJlcGFyZSB0aGUgRGF0YSBieSB0YWtpbmcgb255IHRoZSBhbm51YWwgaW5jb21lIGFuZCBzcGVuZGluZyBzY29yZS4NCg0KYGBge3J9DQp4ID0gZGZbLDQ6NV0NCngNCmBgYA0KDQpVc2UgdGhlIGRlbmRvZ3JhbSB0byBmaW5kIHRoZSBvcHRpbWFsIG51bWJlciBvZiBjbHVzdGVycw0KDQpgYGB7cn0NCmRlbmRyb2dyYW0gPSBoY2x1c3QoZGlzdCh4LCBtZXRob2QgPSJldWNsaWRlYW4iKSwgbWV0aG9kID0gIndhcmQuRCIpDQpwbG90KGRlbmRyb2dyYW0sDQogICAgIG1haW4gPSBwYXN0ZSgiRGVuZHJvZ3JhbSIpLA0KICAgICB4bGFiID0gIkN1c3RvbWVyIiwNCiAgICAgeWxhYiA9ICJFdWNsaWRlYW4gRGlzdGFuY2UiKQ0KDQpgYGANCg0KU28gdGhlcmUgc2hvdWxkIGJlIDUgY2x1c3RlcnMuIChvciA2PykNCg0KYGBge3J9DQojIGZpdHRpbmcgaGllcmFyY2hpY2FsIGNsdXN0ZXJpbmcgdG8gdGhlIG1hbGwgZGF0YXNldA0KaGMgPSBoY2x1c3QoZGlzdCh4LCBtZXRob2QgPSAiZXVjbGlkZWFuIiksIG1ldGhvZCA9ICJ3YXJkLkQiKQ0KIyB3ZSB1c2UgNSBiZWxvdyBmb3IgdGhlIDUgY2x1c3RlcnMNCnlfaGMgPSBjdXRyZWUoaGMsNikNCnlfaGMNCg0KYGBgDQoNClZpc3VhbGl6aW5nIHRoZSBjbHVzdGVycw0KDQpgYGB7cn0NCmxpYnJhcnkoY2x1c3RlcikNCmNsdXNwbG90KHgsIHlfaGMsIA0KICAgICAgICAgbGluZXMgPSAwICwgDQogICAgICAgICBzaGFkZSA9IFRSVUUsDQogICAgICAgICBjb2xvciA9IFRSVUUsDQogICAgICAgICBsYWJlbHMgPSAyLCANCiAgICAgICAgIHBsb3RjaGFyID0gRkFMU0UsDQogICAgICAgICBzcGFuID0gVFJVRSwNCiAgICAgICAgIG1haW4gPSBwYXN0ZSgiQ2x1c3RlcnMgb2YgY2xlaW50cyIpLA0KICAgICAgICAgeGxhYj0iQW5udWFsIEluY29tZSIsDQogICAgICAgICB5bGFiPSJTcGVuZGluZyBTY29yZSIpDQpgYGANCg0K