data <- read.csv("C:\\Users\\tariqm\\Documents\\R\\Datasets\\Capstone\\campus_web_data_6months.csv")

str(data)
## 'data.frame':    32 obs. of  10 variables:
##  $ Campus                   : chr  "CAMP001" "CAMP002" "CAMP003" "CAMP004" ...
##  $ Average.Sessions         : int  81 459 17 262 296 202 481 680 108 249 ...
##  $ Average.Bounce.Rate      : int  47 60 75 68 21 50 71 5 70 73 ...
##  $ Total.Bounces            : int  7203 52391 2621 32787 11454 19386 67942 7968 13468 35508 ...
##  $ Average.Repeat.Users     : int  5 37 1 16 29 20 36 67 9 16 ...
##  $ Average.New.Users        : int  64 397 15 211 243 171 394 578 94 213 ...
##  $ Total.Session.Duration   : int  386193 3883454 131926 1256348 4155633 1962315 4502922 3955837 620753 1152193 ...
##  $ Average.Session.Duration : int  24 45 45 25 74 50 54 31 31 24 ...
##  $ Average.Pages.per.Session: num  1.73 1.36 1.24 1.38 1.37 1.76 1.39 2.58 1.4 1.36 ...
##  $ Average.Conversion.Rate  : num  0.008 0.05 0.077 0.014 0.059 0.021 0.06 0.036 0 0.058 ...
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
corr <- round(cor(data[,-1]),1)              

ggcorrplot(corr,
           method = "square",
           type = "lower",
           lab = TRUE, 
           lab_size = 3,  
           colors = c("tomato2", "white", "springgreen3"),
           title="Correlogram", 
           ggtheme = theme_bw) 

ndata <- scale(data[,-1])

pc <- prcomp(ndata, scale = TRUE)
print(pc)
## Standard deviations (1, .., p=9):
## [1] 2.0804134 1.4401953 1.2750465 0.7373798 0.4458608 0.3762814 0.2495790
## [8] 0.1540635 0.0428968
## 
## Rotation (n x k) = (9 x 9):
##                                   PC1         PC2         PC3         PC4
## Average.Sessions           0.46697128  0.14797759 -0.04781459  0.04349462
## Average.Bounce.Rate       -0.19511309  0.56130906  0.24563567  0.06478086
## Total.Bounces              0.25968851  0.52366293  0.17222734  0.01261436
## Average.Repeat.Users       0.45980679  0.05159668 -0.05754900 -0.02804140
## Average.New.Users          0.46442044  0.14669018 -0.05644001  0.08576043
## Total.Session.Duration     0.44528308 -0.13651311  0.14899441 -0.20826741
## Average.Session.Duration   0.16601352 -0.50232261  0.37220288 -0.36185943
## Average.Pages.per.Session  0.13857297 -0.20455908 -0.63471739  0.46514610
## Average.Conversion.Rate    0.06543676 -0.22764759  0.58104800  0.77131895
##                                   PC5         PC6          PC7         PC8
## Average.Sessions          -0.06713937 -0.04819242  0.106037257  0.35463106
## Average.Bounce.Rate        0.18010612  0.72371922 -0.060200707  0.14826604
## Total.Bounces              0.52938550 -0.47275016 -0.144382414 -0.31571030
## Average.Repeat.Users      -0.42394685  0.20848708 -0.696172700 -0.25256346
## Average.New.Users         -0.06539026 -0.03891268  0.229376210  0.56765919
## Total.Session.Duration     0.01883710  0.34504703  0.567316957 -0.52438958
## Average.Session.Duration   0.50359753  0.13055125 -0.315005895  0.28414065
## Average.Pages.per.Session  0.48810642  0.26011936 -0.077022419 -0.08878989
## Average.Conversion.Rate   -0.08114851 -0.04087857  0.002474325 -0.05433006
##                                     PC9
## Average.Sessions          -7.823204e-01
## Average.Bounce.Rate       -2.269168e-02
## Total.Bounces              6.524297e-02
## Average.Repeat.Users       1.008700e-01
## Average.New.Users          6.096022e-01
## Total.Session.Duration     3.559853e-02
## Average.Session.Duration  -3.942972e-03
## Average.Pages.per.Session  7.375082e-05
## Average.Conversion.Rate   -1.144091e-02
summary(pc)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0804 1.4402 1.2750 0.73738 0.44586 0.37628 0.24958
## Proportion of Variance 0.4809 0.2305 0.1806 0.06041 0.02209 0.01573 0.00692
## Cumulative Proportion  0.4809 0.7114 0.8920 0.95242 0.97451 0.99024 0.99716
##                            PC8    PC9
## Standard deviation     0.15406 0.0429
## Proportion of Variance 0.00264 0.0002
## Cumulative Proportion  0.99980 1.0000
plot(pc, type = "lines")

library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
pca <- principal(ndata, nfactors=3, rotate="varimax", scores=TRUE)
pca$values
## [1] 4.328119966 2.074162540 1.625743666 0.543728911 0.198791866 0.141587686
## [7] 0.062289673 0.023735556 0.001840135
pca$loadings
## 
## Loadings:
##                           RC1    RC2    RC3   
## Average.Sessions           0.993              
## Average.Bounce.Rate       -0.167 -0.891 -0.308
## Total.Bounces              0.727 -0.592 -0.172
## Average.Repeat.Users       0.940  0.191       
## Average.New.Users          0.988              
## Total.Session.Duration     0.830  0.226  0.439
## Average.Session.Duration   0.119  0.326  0.865
## Average.Pages.per.Session  0.208  0.785 -0.407
## Average.Conversion.Rate          -0.183  0.801
## 
##                  RC1   RC2   RC3
## SS loadings    4.148 2.002 1.878
## Proportion Var 0.461 0.222 0.209
## Cumulative Var 0.461 0.683 0.892
pdata <- pca$scores

set.seed(123)

library(factoextra)
## Warning: package 'factoextra' was built under R version 4.0.5
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(pdata, kmeans, method = "wss")

fviz_nbclust (pdata, kmeans, method = "silhouette")

fviz_nbclust (pdata, kmeans, method = "gap_stat")

set.seed(123)

cluster <- kmeans(pdata,2)
cluster
## K-means clustering with 2 clusters of sizes 12, 20
## 
## Cluster means:
##          RC1        RC2         RC3
## 1  1.1059982 -0.1880291 -0.13112329
## 2 -0.6635989  0.1128174  0.07867397
## 
## Clustering vector:
##  [1] 2 1 2 2 2 2 1 1 2 2 2 1 1 1 2 2 2 2 1 2 2 2 1 1 2 1 2 2 2 1 1 2
## 
## Within cluster sum of squares by cluster:
## [1] 36.48854 32.01648
##  (between_SS / total_SS =  26.3 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(cluster, data = pdata, axes = c(1,2), labelsize = 1)

biplot(pc)

library(rgl)
## Warning: package 'rgl' was built under R version 4.0.5
plot3d(pdata, type="p", col = cluster$cluster)
text3d(pdata, texts=rownames(data), font=2)
grid3d('x')
grid3d('y')
grid3d('z')

text3d(pca$loadings[,1:3], texts=rownames(pca$loadings), col="black")
coords <- NULL
for (i in 1:nrow(pca$loadings)) {
  coords <- rbind(coords, rbind(c(0,0,0),pca$loadings[i,1:3]))
}
lines3d(coords, col="black", lwd=4)

library(pca3d)
## Warning: package 'pca3d' was built under R version 4.0.5
pca2d(pc, group = cluster$cluster, legend="bottomleft",
      bg = "white", axes.color= "white",  biplot = TRUE,
      show.ellipses=TRUE, ellipse.ci= .75, show.plane = TRUE)

pca2d(pdata, group = cluster$cluster, legend="bottomleft",
      bg = "white", axes.color= "white", biplot = pc$rotation,
      show.ellipses=TRUE, ellipse.ci=0.75, show.plane = TRUE)

pca3d(pc, group = cluster$cluster, legend = "bottom", 
      bg = "black", axes.color= "white", biplot = TRUE,
      show.ellipses=TRUE, ellipse.ci=0.75, show.plane = TRUE)
## [1] 0.11061592 0.06336506 0.07233034
pca3d(pdata, group = cluster$cluster, legend = "bottom", 
      bg = "black", axes.color= "white", biplot = pc$rotation,
      show.ellipses=TRUE, ellipse.ci=0.75, show.plane = TRUE)
## [1] 0.04026762 0.06512394 0.04579045