data <- read.csv("C:\\Users\\tariqm\\Documents\\R\\Datasets\\utilities.csv")

ndata <- scale(data[-1])

library(factoextra)
## Warning: package 'factoextra' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
distance = dist(ndata)
fviz_dist(distance, gradient = list(low = "sky blue", mid = "white", high = "red"))

data.hclust = hclust(distance)
plot(data.hclust)

plot(data.hclust, labels = data$Company, main = 'default from Hclust', hang = -1)

rect.hclust(data.hclust, k = 3, border = 'green')

member = cutree(data.hclust, 3)
table(member)
## member
##  1  2  3 
## 14  5  3
aggregate(ndata, list(member), mean)
##   Group.1 Fixed_charge        RoR        Cost       Load   D.Demand      Sales
## 1       1    0.3068832  0.4326015 -0.31481203 -0.3743722 -0.2605107 -0.1575387
## 2       2   -0.4991075 -0.7113763  0.07812761  1.3365904  0.1343994 -0.6728046
## 3       3   -0.6002757 -0.8331800  1.33891013 -0.4805802  0.9917178  1.8565214
##      Nuclear  Fuel_Cost
## 1  0.3692252 -0.2389329
## 2 -0.6050529  1.2484717
## 3 -0.7146294 -0.9657660
aggregate(data[,-c(1,1)], list(member), mean)
##   Group.1 Fixed_charge       RoR     Cost     Load D.Demand     Sales Nuclear
## 1       1     1.170714 11.707143 155.2143 55.30714 2.428571  8354.786   18.20
## 2       2     1.022000  9.140000 171.4000 62.94000 3.660000  6525.600    1.84
## 3       3     1.003333  8.866667 223.3333 54.83333 6.333333 15504.667    0.00
##   Fuel_Cost
## 1 0.9698571
## 2 1.7970000
## 3 0.5656667
library(cluster)
## Warning: package 'cluster' was built under R version 4.0.5
plot(silhouette(cutree(data.hclust,3), distance)) 

fviz_nbclust(ndata, kmeans, method = "wss")

fviz_nbclust (ndata, kmeans, method = "silhouette")

set.seed(123)
kc <- kmeans(ndata,5)
kc
## K-means clustering with 5 clusters of sizes 6, 7, 3, 1, 5
## 
## Cluster means:
##   Fixed_charge        RoR       Cost       Load    D.Demand      Sales
## 1  -0.61834147 -0.6252226  0.2019400  1.1482980  0.05636417 -0.7402978
## 2   0.50431607  0.7795509 -0.9858961 -0.3375463 -0.48957692  0.3518600
## 3  -0.60027572 -0.8331800  1.3389101 -0.4805802  0.99171778  1.8565214
## 4   2.03732429 -0.8628882  0.5782326 -1.2950193 -0.71864311 -1.5814284
## 5  -0.01133215  0.3313815  0.2189339 -0.3580408  0.16646865 -0.4018738
##      Nuclear  Fuel_Cost
## 1 -0.3722028  1.1759426
## 2 -0.5232108 -0.4105368
## 3 -0.7146294 -0.9657660
## 4  0.2143888  1.6926380
## 5  1.5650384 -0.5954476
## 
## Clustering vector:
##  [1] 2 1 2 5 4 2 1 3 2 5 3 1 5 2 1 3 1 2 2 5 1 5
## 
## Within cluster sum of squares by cluster:
## [1] 21.187976 26.507769  9.533522  0.000000 10.177094
##  (between_SS / total_SS =  59.9 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
cluster::clusplot(data, kc$cluster,
                  color = T,
                  shade = T,
                  labels = 2,
                  lines = 0)

fviz_cluster( kmeans(ndata,5), data = ndata, axes = c(1,2))

library(psych)
## Warning: package 'psych' was built under R version 4.0.5
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
pairs.panels(data[,-c(1,1)],
             gap = 0,
             bg = c("red", "yellow", "blue"),
             pch=21)

pc <- prcomp(data[,-c(1,1)],
             center = TRUE,
             scale. = TRUE)
attributes(pc)
## $names
## [1] "sdev"     "rotation" "center"   "scale"    "x"       
## 
## $class
## [1] "prcomp"
pc$center
## Fixed_charge          RoR         Cost         Load     D.Demand        Sales 
##     1.114091    10.736364   168.181818    56.977273     3.240909  8914.045455 
##      Nuclear    Fuel_Cost 
##    12.000000     1.102727
pc$scale
## Fixed_charge          RoR         Cost         Load     D.Demand        Sales 
##    0.1845112    2.2440494   41.1913495    4.4611478    3.1182503 3549.9840305 
##      Nuclear    Fuel_Cost 
##   16.7919198    0.5560981
print(pc)
## Standard deviations (1, .., p=8):
## [1] 1.4740918 1.3785018 1.1504236 0.9983701 0.8056180 0.7560814 0.4652989
## [8] 0.4115657
## 
## Rotation (n x k) = (8 x 8):
##                      PC1         PC2         PC3         PC4        PC5
## Fixed_charge  0.44554526 -0.23217669  0.06712849 -0.55549758  0.4008403
## RoR           0.57119021 -0.10053490  0.07123367 -0.33209594 -0.3359424
## Cost         -0.34869054  0.16130192  0.46733094 -0.40908380  0.2685680
## Load         -0.28890116 -0.40918419 -0.14259793 -0.33373941 -0.6800711
## D.Demand     -0.35536100  0.28293270  0.28146360 -0.39139699 -0.1626375
## Sales         0.05383343  0.60309487 -0.33199086 -0.19086550 -0.1319721
## Nuclear       0.16797023 -0.08536118  0.73768406  0.33348714 -0.2496462
## Fuel_Cost    -0.33584032 -0.53988503 -0.13442354 -0.03960132  0.2926660
##                      PC6         PC7         PC8
## Fixed_charge -0.00654016  0.20578234 -0.48107955
## RoR          -0.13326000 -0.15026737  0.62855128
## Cost          0.53750238 -0.11762875  0.30294347
## Load          0.29890373  0.06429342 -0.24781930
## D.Demand     -0.71916993 -0.05155339 -0.12223012
## Sales         0.14953365  0.66050223  0.10339649
## Nuclear       0.02644086  0.48879175 -0.08466572
## Fuel_Cost    -0.25235278  0.48914707  0.43300956
summary(pc)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.4741 1.3785 1.1504 0.9984 0.80562 0.75608 0.46530
## Proportion of Variance 0.2716 0.2375 0.1654 0.1246 0.08113 0.07146 0.02706
## Cumulative Proportion  0.2716 0.5091 0.6746 0.7992 0.88031 0.95176 0.97883
##                            PC8
## Standard deviation     0.41157
## Proportion of Variance 0.02117
## Cumulative Proportion  1.00000
pairs.panels(pc$x,
             gap=0,
             bg = c("red", "yellow", "blue"),
             pch=21)

library(devtools)
## Warning: package 'devtools' was built under R version 4.0.5
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.0.5
library(ggbiplot)
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 4.0.5
## Loading required package: scales
## Warning: package 'scales' was built under R version 4.0.5
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
## Loading required package: grid
g <- ggbiplot(pc,
              obs.scale = 1,
              var.scale = 1,
              labels = data$Company,
              circle = TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
               legend.position = 'top')
print(g)