data <- read.csv("C:\\Users\\tariqm\\Documents\\R\\Datasets\\utilities.csv")
ndata <- scale(data[-1])
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
distance = dist(ndata)
fviz_dist(distance, gradient = list(low = "sky blue", mid = "white", high = "red"))

data.hclust = hclust(distance)
plot(data.hclust)

plot(data.hclust, labels = data$Company, main = 'default from Hclust', hang = -1)
rect.hclust(data.hclust, k = 3, border = 'green')

member = cutree(data.hclust, 3)
table(member)
## member
## 1 2 3
## 14 5 3
aggregate(ndata, list(member), mean)
## Group.1 Fixed_charge RoR Cost Load D.Demand Sales
## 1 1 0.3068832 0.4326015 -0.31481203 -0.3743722 -0.2605107 -0.1575387
## 2 2 -0.4991075 -0.7113763 0.07812761 1.3365904 0.1343994 -0.6728046
## 3 3 -0.6002757 -0.8331800 1.33891013 -0.4805802 0.9917178 1.8565214
## Nuclear Fuel_Cost
## 1 0.3692252 -0.2389329
## 2 -0.6050529 1.2484717
## 3 -0.7146294 -0.9657660
aggregate(data[,-c(1,1)], list(member), mean)
## Group.1 Fixed_charge RoR Cost Load D.Demand Sales Nuclear
## 1 1 1.170714 11.707143 155.2143 55.30714 2.428571 8354.786 18.20
## 2 2 1.022000 9.140000 171.4000 62.94000 3.660000 6525.600 1.84
## 3 3 1.003333 8.866667 223.3333 54.83333 6.333333 15504.667 0.00
## Fuel_Cost
## 1 0.9698571
## 2 1.7970000
## 3 0.5656667
library(cluster)
## Warning: package 'cluster' was built under R version 4.0.5
plot(silhouette(cutree(data.hclust,3), distance))

fviz_nbclust(ndata, kmeans, method = "wss")

fviz_nbclust (ndata, kmeans, method = "silhouette")

set.seed(123)
kc <- kmeans(ndata,5)
kc
## K-means clustering with 5 clusters of sizes 6, 7, 3, 1, 5
##
## Cluster means:
## Fixed_charge RoR Cost Load D.Demand Sales
## 1 -0.61834147 -0.6252226 0.2019400 1.1482980 0.05636417 -0.7402978
## 2 0.50431607 0.7795509 -0.9858961 -0.3375463 -0.48957692 0.3518600
## 3 -0.60027572 -0.8331800 1.3389101 -0.4805802 0.99171778 1.8565214
## 4 2.03732429 -0.8628882 0.5782326 -1.2950193 -0.71864311 -1.5814284
## 5 -0.01133215 0.3313815 0.2189339 -0.3580408 0.16646865 -0.4018738
## Nuclear Fuel_Cost
## 1 -0.3722028 1.1759426
## 2 -0.5232108 -0.4105368
## 3 -0.7146294 -0.9657660
## 4 0.2143888 1.6926380
## 5 1.5650384 -0.5954476
##
## Clustering vector:
## [1] 2 1 2 5 4 2 1 3 2 5 3 1 5 2 1 3 1 2 2 5 1 5
##
## Within cluster sum of squares by cluster:
## [1] 21.187976 26.507769 9.533522 0.000000 10.177094
## (between_SS / total_SS = 59.9 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
cluster::clusplot(data, kc$cluster,
color = T,
shade = T,
labels = 2,
lines = 0)

fviz_cluster( kmeans(ndata,5), data = ndata, axes = c(1,2))

library(psych)
## Warning: package 'psych' was built under R version 4.0.5
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
pairs.panels(data[,-c(1,1)],
gap = 0,
bg = c("red", "yellow", "blue"),
pch=21)

pc <- prcomp(data[,-c(1,1)],
center = TRUE,
scale. = TRUE)
attributes(pc)
## $names
## [1] "sdev" "rotation" "center" "scale" "x"
##
## $class
## [1] "prcomp"
pc$center
## Fixed_charge RoR Cost Load D.Demand Sales
## 1.114091 10.736364 168.181818 56.977273 3.240909 8914.045455
## Nuclear Fuel_Cost
## 12.000000 1.102727
pc$scale
## Fixed_charge RoR Cost Load D.Demand Sales
## 0.1845112 2.2440494 41.1913495 4.4611478 3.1182503 3549.9840305
## Nuclear Fuel_Cost
## 16.7919198 0.5560981
print(pc)
## Standard deviations (1, .., p=8):
## [1] 1.4740918 1.3785018 1.1504236 0.9983701 0.8056180 0.7560814 0.4652989
## [8] 0.4115657
##
## Rotation (n x k) = (8 x 8):
## PC1 PC2 PC3 PC4 PC5
## Fixed_charge 0.44554526 -0.23217669 0.06712849 -0.55549758 0.4008403
## RoR 0.57119021 -0.10053490 0.07123367 -0.33209594 -0.3359424
## Cost -0.34869054 0.16130192 0.46733094 -0.40908380 0.2685680
## Load -0.28890116 -0.40918419 -0.14259793 -0.33373941 -0.6800711
## D.Demand -0.35536100 0.28293270 0.28146360 -0.39139699 -0.1626375
## Sales 0.05383343 0.60309487 -0.33199086 -0.19086550 -0.1319721
## Nuclear 0.16797023 -0.08536118 0.73768406 0.33348714 -0.2496462
## Fuel_Cost -0.33584032 -0.53988503 -0.13442354 -0.03960132 0.2926660
## PC6 PC7 PC8
## Fixed_charge -0.00654016 0.20578234 -0.48107955
## RoR -0.13326000 -0.15026737 0.62855128
## Cost 0.53750238 -0.11762875 0.30294347
## Load 0.29890373 0.06429342 -0.24781930
## D.Demand -0.71916993 -0.05155339 -0.12223012
## Sales 0.14953365 0.66050223 0.10339649
## Nuclear 0.02644086 0.48879175 -0.08466572
## Fuel_Cost -0.25235278 0.48914707 0.43300956
summary(pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.4741 1.3785 1.1504 0.9984 0.80562 0.75608 0.46530
## Proportion of Variance 0.2716 0.2375 0.1654 0.1246 0.08113 0.07146 0.02706
## Cumulative Proportion 0.2716 0.5091 0.6746 0.7992 0.88031 0.95176 0.97883
## PC8
## Standard deviation 0.41157
## Proportion of Variance 0.02117
## Cumulative Proportion 1.00000
pairs.panels(pc$x,
gap=0,
bg = c("red", "yellow", "blue"),
pch=21)

library(devtools)
## Warning: package 'devtools' was built under R version 4.0.5
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.0.5
library(ggbiplot)
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 4.0.5
## Loading required package: scales
## Warning: package 'scales' was built under R version 4.0.5
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
## Loading required package: grid
g <- ggbiplot(pc,
obs.scale = 1,
var.scale = 1,
labels = data$Company,
circle = TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
print(g)
