#Principal Component Analysis and Hierarchical Clustering
# ref: http://moderndata.plot.ly/principal-component-analysis-cluster-plotly/
pcaCars <- princomp(mtcars,cor = TRUE)
names(pcaCars)
## [1] "sdev" "loadings" "center" "scale" "n.obs" "scores"
## [7] "call"
summary(pcaCars)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 2.5706809 1.6280258 0.79195787 0.51922773
## Proportion of Variance 0.6007637 0.2409516 0.05701793 0.02450886
## Cumulative Proportion 0.6007637 0.8417153 0.89873322 0.92324208
## Comp.5 Comp.6 Comp.7 Comp.8
## Standard deviation 0.47270615 0.45999578 0.36777981 0.35057301
## Proportion of Variance 0.02031374 0.01923601 0.01229654 0.01117286
## Cumulative Proportion 0.94355581 0.96279183 0.97508837 0.98626123
## Comp.9 Comp.10 Comp.11
## Standard deviation 0.277572792 0.228112781 0.148473587
## Proportion of Variance 0.007004241 0.004730495 0.002004037
## Cumulative Proportion 0.993265468 0.997995963 1.000000000
plot(pcaCars,type="l")

carsHC <- hclust(dist(pcaCars$scores),method = "ward.D2")
plot(carsHC)

carsClusters <- cutree(carsHC,k=3)
plot(carsClusters)
carsDf <- data.frame(pcaCars$scores,"cluster"=factor(carsClusters))
carsDf <- transform(carsDf,cluster_name = paste("Cluster",carsClusters))
library(ggplot2)

p1 <- ggplot(carsDf,aes(x=Comp.1,y=Comp.2))+
theme_classic()+
geom_hline(yintercept = 0,color="gray70")+
geom_vline(xintercept = 0,color="gray70")+
geom_point(aes(color=cluster),alpha=0.55,size=3)+
xlab("PC1")+
ylab("PC2")+
xlim(-5,6)+
ggtitle("PCA Clusters from Hierarchical Clustering of Cars Data")
p1+geom_text(aes(y=Comp.2+0.25,label=rownames(carsDf)))

library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:graphics':
##
## layout
p <- plot_ly(carsDf,x=Comp.1,y=Comp.2,text=rownames(carsDf),
mode="markers",color = cluster_name,marker=list(size=11))
p <- layout(p,title="PCA Clusters from Hierachical Clustering of Cars Data",
xaxis=list(title="PC1"),
yaxis=list(title="PC2"))
p