data <- read.csv ("C:\\Users\\tariqm\\Documents\\R\\Datasets\\Mall_Customers.csv", sep = ",")
ndata <- scale(data[,-c(1,2)])
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(ndata, kmeans, method = "wss")

fviz_nbclust (ndata, kmeans, method = "silhouette")

library(cluster)
## Warning: package 'cluster' was built under R version 4.0.5
stat_gap <- clusGap(data[,-c(1,2)], FUN = kmeans, nstart = 25, K.max = 10, B = 50)
fviz_gap_stat(stat_gap)

distance <- dist(ndata)
fviz_dist(distance, gradient = list(low = "green", mid = "white", high = "red"))

data.hclust1 <- hclust(distance)
plot(data.hclust1, hang = -1, labels = data$state )
rect.hclust(data.hclust1, k = 4, border = 'green')

data.hclust2 <- hclust(distance, method = 'average')
plot(data.hclust2, hang = -1, labels = data$state)
rect.hclust(data.hclust2, k = 4, border = 'green')

member1 = cutree(data.hclust1, 4)
table(member1)
## member1
## 1 2 3 4
## 69 57 39 35
aggregate(ndata, list(member1), mean)
## Group.1 Age Annual.Income..k.. Spending.Score..1.100.
## 1 1 -0.8285941 -0.6768544 0.2391932
## 2 2 1.1799932 -0.5042586 -0.3528675
## 3 3 -0.4408110 0.9891010 1.2364001
## 4 4 0.2030004 1.0534500 -1.2745855
aggregate(data[,-c(1,2)], list(member1), mean)
## Group.1 Age Annual.Income..k.. Spending.Score..1.100.
## 1 1 27.27536 42.78261 56.37681
## 2 2 55.33333 47.31579 41.08772
## 3 3 32.69231 86.53846 82.12821
## 4 4 41.68571 88.22857 17.28571
plot(silhouette(cutree(data.hclust1,4), distance))

member2 = cutree(data.hclust2, 4)
table(member2)
## member2
## 1 2 3 4
## 74 52 39 35
aggregate(ndata, list(member2), mean)
## Group.1 Age Annual.Income..k.. Spending.Score..1.100.
## 1 1 0.8504348 -0.5481816 -0.3719634
## 2 2 -1.0162607 -0.6707740 0.4599265
## 3 3 -0.4408110 0.9891010 1.2364001
## 4 4 0.2030004 1.0534500 -1.2745855
aggregate(data[,-c(1,2)], list(member2), mean)
## Group.1 Age Annual.Income..k.. Spending.Score..1.100.
## 1 1 50.72973 46.16216 40.59459
## 2 2 24.65385 42.94231 62.07692
## 3 3 32.69231 86.53846 82.12821
## 4 4 41.68571 88.22857 17.28571
plot(silhouette(cutree(data.hclust2,4), distance))

set.seed(123)
kc <- kmeans(ndata,4)
clusplot(data, kc$cluster, color = T, shade = T, labels = 2, lines = 0)

fviz_cluster( kmeans(ndata,4), data = ndata, axes = c(1,2))

library(psych)
## Warning: package 'psych' was built under R version 4.0.5
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pairs.panels(data[,-c(1,2)],
gap = 0,
bg = c("red", "yellow", "blue"),
pch=21)

pc <- prcomp(data[,-c(1,2)],
center = TRUE,
scale. = TRUE)
attributes(pc)
## $names
## [1] "sdev" "rotation" "center" "scale" "x"
##
## $class
## [1] "prcomp"
pc$center
## Age Annual.Income..k.. Spending.Score..1.100.
## 38.85 60.56 50.20
pc$scale
## Age Annual.Income..k.. Spending.Score..1.100.
## 13.96901 26.26472 25.82352
print(pc)
## Standard deviations (1, .., p=3):
## [1] 1.1523823 0.9996256 0.8202217
##
## Rotation (n x k) = (3 x 3):
## PC1 PC2 PC3
## Age 0.70638235 -0.03014116 0.707188441
## Annual.Income..k.. -0.04802398 -0.99883160 0.005397916
## Spending.Score..1.100. -0.70619946 0.03777499 0.707004506
summary(pc)
## Importance of components:
## PC1 PC2 PC3
## Standard deviation 1.1524 0.9996 0.8202
## Proportion of Variance 0.4427 0.3331 0.2243
## Cumulative Proportion 0.4427 0.7758 1.0000
pairs.panels(pc$x,
gap=0,
bg = c("red", "yellow", "blue"),
pch=21)

library(devtools)
## Warning: package 'devtools' was built under R version 4.0.5
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.0.5
library(ggbiplot)
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 4.0.5
## Loading required package: scales
## Warning: package 'scales' was built under R version 4.0.5
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
## Loading required package: grid
g <- ggbiplot(pc,
obs.scale = 1,
var.scale = 1,
groups = data$Gender,
ellipse = TRUE,
circle = TRUE,
ellipse.prob = 0.68)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
print(g)
