Data Table:
df
Checking of Missing Vaue in Data Frame:
Distribution of Rules:
Principal Component Analysis :
So from Above plot we can take first four PC Component which is sufficient enougf to explain the variance in our Data.
pc <- prcomp(df.sc)
comp <- data.frame(pc$x[,1:4])
plot(comp,pch=16,col=rgb(0,0,0,0.5))
Loading Principal Component :
Loadings:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
VIB_RULE1 -0.236 0.457 0.253 -0.234 -0.181 0.175 -0.109 0.725
VIB_RULE2 0.434 0.437 0.128 0.425 -0.258 0.313 -0.301 0.242 -0.289 -0.134
DE_RULE1 -0.299 -0.292 -0.297 -0.207 -0.220 0.345 -0.359 0.523 -0.312 -0.144
DE_RULE2 -0.370 -0.145 -0.229 0.176 0.188 -0.429 -0.298 0.555 0.373
NDE_RULE1 -0.365 -0.127 0.238 0.321 0.117 -0.140 0.277 0.743
NDE_RULE2 -0.355 -0.136 0.104 0.241 0.369 -0.130 0.186 -0.175 0.340 -0.666
IDE_RULE2 -0.230 0.316 -0.457 0.144 -0.146 0.555 0.257 -0.354 -0.302
INDE_RULE2 -0.387 0.141 0.582 0.277 0.478 -0.404
OIL_RULE1 -0.238 -0.375 -0.753 -0.363 0.132 -0.231 0.137
OIL_RULE2 -0.371 -0.168 0.161 0.161 -0.302 -0.247 -0.444 -0.271 -0.599
RULE3_1 0.326 -0.680 0.173 0.176 -0.336 -0.314 0.326 0.180 0.118
VIB_RULE4 0.236 -0.464 0.248 0.251 -0.222 0.227 -0.102 0.173 -0.132 0.664
Power -0.176 -0.668 0.589 0.368 0.112 -0.133
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Proportion Var 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077
Cumulative Var 0.077 0.154 0.231 0.308 0.385 0.462 0.538 0.615 0.692 0.769 0.846 0.923 1.000
Lets Check for Optimum No of Cluster: Within Sum of Square
wss <- (nrow(df.sc)-1)*sum(apply(df.sc , 2 , var))
for (i in 2:15)
wss[i] <- sum(kmeans(df.sc,centers = i)$withinss)
plot(1:15, wss,type="b",xlab= "No of Cluster" , ylab = "Within Groups of sum of square",col="red",pch=16 , main="WSS by Cluster")
text(x=10, y=650,labels = "No Clear Elbow \nwe can Observe \n Within the Plot \n\n\n Between 3 and 5 cluster is Optimium")
By Checking Within Group of Sum of Square we say that optimium K value should be either between 2 to 4 , Here in our case we will consider K=4
k <- kmeans(comp , 5 ,nstart = 25 , iter.max = 1000)
library(RColorBrewer) ; library(scales)
palette(alpha(brewer.pal(9,'Set1'),0.5))
plot(comp , col=k$clust,pch=16)
Cluster Size
sort(table(k$cluster))
4 5 2 1 3
1 2 13 17 35
clust <- names(sort(table(k$cluster)))
Turbines in Cluster-1
rownames(df[k$clust == clust[1],])
[1] "JMA173"
Turbines in Cluster-2
rownames(df[k$clust == clust[2],])
[1] "JMD511" "JMA174"
Turbines in Cluster-3
rownames(df[k$clust == clust[3],])
[1] "JMA161" "JMA188" "JMD553" "JMA134" "JMA198" "JMA129" "JMD585" "JMA164" "JMA196" "JMA163" "JMA041" "JMA167" "JMA186"
Turbines in Cluster-4
rownames(df[k$clust == clust[4],])
[1] "JMA183" "JMA182" "JMA053" "JMA133" "JMA195" "JMA102" "JMA105" "JMA165" "JMD563" "JMA187" "JMA216" "JMA197" "JMA203"
[14] "JMA044" "JMA103" "JMA189" "JMD338"
3D View of Clustering: PC1,PC2,PC3
library(rgl)
plot3d(comp$PC1,comp$PC2,comp$PC3,col=k$clust)
3D View of Clustering: PC1,PC3,PC4
library(rgl)
plot3d(comp$PC1,comp$PC3,comp$PC4,col=k$clust)
names(df)
[1] "VIB_RULE1" "VIB_RULE2" "DE_RULE1" "DE_RULE2" "NDE_RULE1" "NDE_RULE2" "IDE_RULE1" "IDE_RULE2" "INDE_RULE1"
[10] "INDE_RULE2" "OIL_RULE1" "OIL_RULE2" "RULE3" "RULE3_1" "VIB_RULE4" "OIL_RULE4" "DE_RULE4" "NDE_RULE4"
[19] "IDE_RULE4" "INDE_RULE4" "Power"
Wind Turbine Accomodation By Cluster:
df.clus <-cbind(Cluster=k$cluster,df)
as.data.frame(df.clus)
names(df.clus)
[1] "Cluster" "VIB_RULE1" "VIB_RULE2" "DE_RULE1" "DE_RULE2" "NDE_RULE1" "NDE_RULE2" "IDE_RULE1" "IDE_RULE2"
[10] "INDE_RULE1" "INDE_RULE2" "OIL_RULE1" "OIL_RULE2" "RULE3" "RULE3_1" "VIB_RULE4" "OIL_RULE4" "DE_RULE4"
[19] "NDE_RULE4" "IDE_RULE4" "INDE_RULE4" "Power"
Classifying the Cluster:
library(rpart);library(partykit);library(rpart.plot);library(randomForest);library(rattle)
#res <- rpart(as.formula(paste(input$vars1," ~ ",paste(input$vars2,collapse="+"))),data=dat)
fit <- rpart(Cluster ~ ., data = df.clus)
fancyRpartPlot(fit)