*Has 45781 observations and contains kW, kVA, ServiceID and Type variables
k-means, Flexclust, **apclust, agglomerative clustering, tclust, trimmedclust, Fuzzymeans, amap, mclust.
TN1 <- read.table("C:\\Users\\Hari\\Desktop\\TN1.csv", header=TRUE, sep = ",")
Response <- TN1[,dim(TN1)[2]]
Predictor <- TN1[,1:(dim(TN1)[2]-1)]
pca <- princomp(Predictor, cor=T) # principal components analysis for dimension reduction
pc.comp <- pca$scores
pc.comp1 <- -1*pc.comp[,1] # principal component 1 scores (negated for convenience)
pc.comp2 <- -1*pc.comp[,2] # principal component 2 scores (negated for convenience)
summary(pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.0507150 0.9984301 0.9482275
## Proportion of Variance 0.3680006 0.3322876 0.2997118
## Cumulative Proportion 0.3680006 0.7002882 1.0000000
## K-Means
X <- cbind(pc.comp1, pc.comp2)
cl <- kmeans(X,5)
cl$cluster
## [1] 4 2 1 1 1 2 4 1 2 4 3 1 5 4 5 5 1 1 1 4 5 1 3 4 5 1 4 5 1 3 3 2 4 1 2
## [36] 4 4 4 2 4 3 1 3 3 4 1 5 1 5 5 1 4 1 3 1 4 3 5 4 5 4 1 2 4 2 4 2 4 4 2
## [71] 5 5 3 5 5 3 3 1 1 1 1 5 5 3 1 3 5 3 1 1
plot(pc.comp1, pc.comp2,col=cl$cluster)
points(cl$centers, pch=3)
## K-Centroids Cluster Analysis (Flexclust)
library(flexclust)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Loading required package: stats4
cl1 = kcca(Predictor, k=4)
cl1
## kcca object of family 'kmeans'
##
## call:
## kcca(x = Predictor, k = 4)
##
## cluster sizes:
##
## 1 2 3 4
## 40 10 20 20
## A barplot of the centroids
barplot(cl1)
## now using k-medians and kmeans++ initialization, cluster centroids
## should be similar...
cl2 = kcca(Predictor, k=4, family=kccaFamily("kmedians"), control=list(initcent="kmeanspp"))
cl2
## kcca object of family 'kmedians'
##
## call:
## kcca(x = Predictor, k = 4, family = kccaFamily("kmedians"),
## control = list(initcent = "kmeanspp"))
##
## cluster sizes:
##
## 1 2 3 4
## 20 20 40 10
## A barplot of the centroids
barplot(cl2)
## apcluster
## create two Gaussian clouds
cl1 <- cbind(rnorm(TN1$ForkVA.real),rnorm(TN1$ServiceID))
cl2 <- cbind(rnorm(TN1$ForkW.real),rnorm(TN1$ServiceID))
x <- rbind(cl1,cl2)
## compute similarity matrix
library(apcluster)
##
## Attaching package: 'apcluster'
## The following object is masked from 'package:stats':
##
## heatmap
sim <- negDistMat(x, r=2)
## running affinity propagation (apcluster)
apres <- apcluster(sim, details=TRUE)
## plotting clustering result
plot(apres, x)
## employing agglomerative clustering to join clusters
aggres <- aggExCluster(sim, apres)
## plotting clustering result
plot(aggres, x)
## Plotting Heatmap
heatmap(apres, sim)
## Trimmed cluster
library(tclust)
## Loading required package: mvtnorm
## Loading required package: sn
##
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
##
## sd
## Loading required package: mclust
## Package 'mclust' version 5.1
## Type 'citation("mclust")' for citing this R package in publications.
## Loading required package: cluster
clus <- tclust (TN1, k = 3, alpha = 0.1)
# Pairs plot of the clustering solution
pairs (TN1, col = clus$cluster + 1)
# Two coordinates
plot (clus)
#discriminant factor
dsc.1 <- DiscrFact(clus)
plot(dsc.1)
library(trimcluster)
y <- cbind(rnorm(TN1$ForkVA.real),rnorm(TN1$ServiceID))
tc <- trimkmeans(Predictor, 3, trim=0.1)
tc
## * trimmed k-means *
## trim= 0.1 , k= 3
## Classification (trimmed points are indicated by 4 ):
## [1] 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 2 4 4 4 4
## [36] 4 4 4 4 4 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2
## [71] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Means:
## ForkVA.real ForkW.real ServiceID
## [1,] 0.4749531 0.4834966 419092838
## [2,] 0.4927409 0.4555299 930162779
## [3,] 0.4619374 0.4730090 604775427
## Trimmed mean squares: 1.847054e+15
summary(tc)
## Length Class Mode
## classification 90 -none- numeric
## means 9 -none- numeric
## criterion 1 -none- numeric
## disttom 90 -none- numeric
## ropt 1 -none- numeric
## k 1 -none- numeric
## trim 1 -none- numeric
## runs 1 -none- numeric
## scaling 1 -none- logical
## Fuzzy Means
fannyx <- fanny(TN1, 4)
summary(fannyx)
## Fuzzy Clustering object of class 'fanny' :
## m.ship.expon. 2
## objective 1.094687e+09
## tolerance 1e-15
## iterations 52
## converged 1
## maxit 500
## n 90
## Membership coefficients (in %, rounded):
## [,1] [,2] [,3] [,4]
## [1,] 58 16 14 12
## [2,] 58 16 14 12
## [3,] 58 16 14 12
## [4,] 58 16 14 12
## [5,] 58 16 14 12
## [6,] 58 16 14 12
## [7,] 58 16 14 12
## [8,] 58 16 14 12
## [9,] 58 16 14 12
## [10,] 58 16 14 12
## [11,] 1 98 0 1
## [12,] 1 98 0 1
## [13,] 1 98 0 1
## [14,] 1 98 0 1
## [15,] 1 98 0 1
## [16,] 1 98 0 1
## [17,] 1 98 0 1
## [18,] 1 98 0 1
## [19,] 1 98 0 1
## [20,] 1 98 0 1
## [21,] 88 6 2 4
## [22,] 88 6 2 4
## [23,] 88 6 2 4
## [24,] 88 6 2 4
## [25,] 88 6 2 4
## [26,] 88 6 2 4
## [27,] 88 6 2 4
## [28,] 88 6 2 4
## [29,] 88 6 2 4
## [30,] 88 6 2 4
## [31,] 32 16 39 13
## [32,] 32 16 39 13
## [33,] 32 16 39 13
## [34,] 32 16 39 13
## [35,] 32 16 39 13
## [36,] 32 16 39 13
## [37,] 32 16 39 13
## [38,] 32 16 39 13
## [39,] 32 16 39 13
## [40,] 32 16 39 13
## [41,] 1 98 0 1
## [42,] 1 98 0 1
## [43,] 1 98 0 1
## [44,] 1 98 0 1
## [45,] 1 98 0 1
## [46,] 1 98 0 1
## [47,] 1 98 0 1
## [48,] 1 98 0 1
## [49,] 1 98 0 1
## [50,] 1 98 0 1
## [51,] 79 11 3 6
## [52,] 79 11 3 6
## [53,] 79 11 3 6
## [54,] 79 11 3 6
## [55,] 79 11 3 6
## [56,] 79 11 3 6
## [57,] 79 11 3 6
## [58,] 79 11 3 6
## [59,] 79 11 3 6
## [60,] 79 11 3 6
## [61,] 1 1 97 1
## [62,] 1 1 97 1
## [63,] 1 1 97 1
## [64,] 1 1 97 1
## [65,] 1 1 97 1
## [66,] 1 1 97 1
## [67,] 1 1 97 1
## [68,] 1 1 97 1
## [69,] 1 1 97 1
## [70,] 1 1 97 1
## [71,] 2 5 1 92
## [72,] 2 5 1 92
## [73,] 2 5 1 92
## [74,] 2 5 1 92
## [75,] 2 5 1 92
## [76,] 2 5 1 92
## [77,] 2 5 1 92
## [78,] 2 5 1 92
## [79,] 2 5 1 92
## [80,] 2 5 1 92
## [81,] 2 4 1 93
## [82,] 2 4 1 93
## [83,] 2 4 1 93
## [84,] 2 4 1 93
## [85,] 2 4 1 93
## [86,] 2 4 1 93
## [87,] 2 4 1 93
## [88,] 2 4 1 93
## [89,] 2 4 1 93
## [90,] 2 4 1 93
## Fuzzyness coefficients:
## dunn_coeff normalized
## 0.7480220 0.6640294
## Closest hard clustering:
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3
## [36] 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3
## [71] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
##
## Silhouette plot information:
## cluster neighbor sil_width
## 23 1 2 0.7001655
## 21 1 2 0.7001655
## 27 1 2 0.7001655
## 26 1 2 0.7001655
## 30 1 2 0.7001655
## 29 1 2 0.7001655
## 25 1 2 0.7001655
## 28 1 2 0.7001655
## 22 1 2 0.7001655
## 24 1 2 0.7001655
## 9 1 3 0.6381402
## 5 1 3 0.6381402
## 3 1 3 0.6381402
## 2 1 3 0.6381402
## 6 1 3 0.6381402
## 10 1 3 0.6381402
## 7 1 3 0.6381402
## 8 1 3 0.6381402
## 4 1 3 0.6381402
## 1 1 3 0.6381402
## 57 1 2 0.5869010
## 51 1 2 0.5869010
## 59 1 2 0.5869010
## 56 1 2 0.5869010
## 54 1 2 0.5869010
## 58 1 2 0.5869010
## 52 1 2 0.5869010
## 55 1 2 0.5869010
## 53 1 2 0.5869010
## 60 1 2 0.5869010
## 14 2 4 0.9859293
## 16 2 4 0.9859293
## 18 2 4 0.9859293
## 17 2 4 0.9859293
## 13 2 4 0.9859293
## 15 2 4 0.9859293
## 20 2 4 0.9859293
## 19 2 4 0.9859293
## 12 2 4 0.9859293
## 11 2 4 0.9859293
## 44 2 4 0.9855428
## 47 2 4 0.9855428
## 43 2 4 0.9855428
## 49 2 4 0.9855428
## 46 2 4 0.9855428
## 41 2 4 0.9855428
## 50 2 4 0.9855428
## 48 2 4 0.9855428
## 45 2 4 0.9855428
## 42 2 4 0.9855428
## 70 3 1 0.7364933
## 65 3 1 0.7364933
## 64 3 1 0.7364933
## 66 3 1 0.7364933
## 69 3 1 0.7364933
## 61 3 1 0.7364933
## 67 3 1 0.7364933
## 68 3 1 0.7364933
## 63 3 1 0.7364933
## 62 3 1 0.7364933
## 40 3 1 0.4722870
## 36 3 1 0.4722870
## 39 3 1 0.4722870
## 33 3 1 0.4722870
## 31 3 1 0.4722870
## 32 3 1 0.4722870
## 35 3 1 0.4722870
## 34 3 1 0.4722870
## 37 3 1 0.4722870
## 38 3 1 0.4722870
## 86 4 2 0.9055175
## 81 4 2 0.9055175
## 88 4 2 0.9055175
## 85 4 2 0.9055175
## 89 4 2 0.9055175
## 83 4 2 0.9055175
## 87 4 2 0.9055175
## 82 4 2 0.9055175
## 90 4 2 0.9055175
## 84 4 2 0.9055175
## 73 4 2 0.8848453
## 76 4 2 0.8848453
## 72 4 2 0.8848453
## 77 4 2 0.8848453
## 79 4 2 0.8848453
## 78 4 2 0.8848453
## 75 4 2 0.8848453
## 74 4 2 0.8848453
## 71 4 2 0.8848453
## 80 4 2 0.8848453
## Average silhouette width per cluster:
## [1] 0.6417356 0.9857360 0.6043901 0.8951814
## Average silhouette width of total data set:
## [1] 0.7662024
##
## 4005 dissimilarities, summarized :
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 80111000 172860000 199540000 296110000 570780000
## Metric : euclidean
## Number of objects : 90
##
## Available components:
## [1] "membership" "coeff" "memb.exp" "clustering" "k.crisp"
## [6] "objective" "convergence" "diss" "call" "silinfo"
## [11] "data"
plot(fannyx)
library(amap)
hc <- hcluster(TN1,link = "ave")
## Warning in hcluster(TN1, link = "ave"): NAs introduced by coercion
plot(hc)
plot(hc, hang = -1)
library(reshape2)
acast(TN1, TN1$Type~TN1$ServiceID, value.var="ForkW.real")
## Aggregation function missing: defaulting to length
## 374897109 389457902 455007891 457008451 562321452
## AutomobileIndustry 0 0 0 10 0
## Bank 0 0 0 0 0
## BpoIndustry 0 0 0 0 0
## CementIndustry 0 0 0 0 0
## ChemicalIndustry 0 10 0 0 0
## Farmers1 0 0 10 0 0
## Farmers2 0 0 0 0 10
## FertilizerIndustry 10 0 0 0 0
## FoodIndustry 0 0 0 0 0
## 581000256 671004572 775001231 945678934
## AutomobileIndustry 0 0 0 0
## Bank 0 10 0 0
## BpoIndustry 10 0 0 0
## CementIndustry 0 0 10 0
## ChemicalIndustry 0 0 0 0
## Farmers1 0 0 0 0
## Farmers2 0 0 0 0
## FertilizerIndustry 0 0 0 0
## FoodIndustry 0 0 0 10
as.matrix(TN1)
## ForkVA.real ForkW.real ServiceID Type
## [1,] "0.86593564" "0.143762529" "671004572" "Bank"
## [2,] "0.12980418" "0.088929798" "671004572" "Bank"
## [3,] "0.06180149" "0.552047074" "671004572" "Bank"
## [4,] "0.09911646" "0.848172019" "671004572" "Bank"
## [5,] "0.20570391" "0.624722466" "671004572" "Bank"
## [6,] "0.16402897" "0.038167964" "671004572" "Bank"
## [7,] "0.61983410" "0.079610681" "671004572" "Bank"
## [8,] "0.01132422" "0.718472400" "671004572" "Bank"
## [9,] "0.34446118" "0.271178428" "671004572" "Bank"
## [10,] "0.67916915" "0.191223431" "671004572" "Bank"
## [11,] "0.49139957" "0.999932744" "457008451" "AutomobileIndustry"
## [12,] "0.07652585" "0.281726878" "457008451" "AutomobileIndustry"
## [13,] "0.87515833" "0.464859129" "457008451" "AutomobileIndustry"
## [14,] "0.64991327" "0.193133884" "457008451" "AutomobileIndustry"
## [15,] "0.78757786" "0.669413202" "457008451" "AutomobileIndustry"
## [16,] "0.81171847" "0.332674854" "457008451" "AutomobileIndustry"
## [17,] "0.24022092" "0.497692577" "457008451" "AutomobileIndustry"
## [18,] "0.29985077" "0.172019319" "457008451" "AutomobileIndustry"
## [19,] "0.12245926" "0.177083235" "457008451" "AutomobileIndustry"
## [20,] "0.86977533" "0.170256120" "457008451" "AutomobileIndustry"
## [21,] "0.77759558" "0.534224742" "581000256" "BpoIndustry"
## [22,] "0.10862193" "0.457122953" "581000256" "BpoIndustry"
## [23,] "0.41906573" "0.642565168" "581000256" "BpoIndustry"
## [24,] "0.87731605" "0.048764741" "581000256" "BpoIndustry"
## [25,] "0.87527674" "0.775392277" "581000256" "BpoIndustry"
## [26,] "0.29715735" "0.514345606" "581000256" "BpoIndustry"
## [27,] "0.74133330" "0.398913153" "581000256" "BpoIndustry"
## [28,] "0.67922815" "0.985437729" "581000256" "BpoIndustry"
## [29,] "0.44705484" "0.273268861" "581000256" "BpoIndustry"
## [30,] "0.49465370" "0.855913294" "581000256" "BpoIndustry"
## [31,] "0.45485969" "0.817051560" "775001231" "CementIndustry"
## [32,] "0.28366765" "0.104793120" "775001231" "CementIndustry"
## [33,] "0.60768234" "0.687070454" "775001231" "CementIndustry"
## [34,] "0.18650264" "0.950588651" "775001231" "CementIndustry"
## [35,] "0.07619765" "0.315548891" "775001231" "CementIndustry"
## [36,] "0.51976463" "0.632151768" "775001231" "CementIndustry"
## [37,] "0.87652107" "0.055456617" "775001231" "CementIndustry"
## [38,] "0.83887772" "0.009060051" "775001231" "CementIndustry"
## [39,] "0.32140231" "0.522392850" "775001231" "CementIndustry"
## [40,] "0.51113739" "0.529004180" "775001231" "CementIndustry"
## [41,] "0.39296486" "0.911056541" "455007891" "Farmers1"
## [42,] "0.11657597" "0.050001042" "455007891" "Farmers1"
## [43,] "0.38223472" "0.836797447" "455007891" "Farmers1"
## [44,] "0.39608830" "0.687679051" "455007891" "Farmers1"
## [45,] "0.91914764" "0.143218874" "455007891" "Farmers1"
## [46,] "0.32520363" "0.220849050" "455007891" "Farmers1"
## [47,] "0.68026602" "0.719326435" "455007891" "Farmers1"
## [48,] "0.10225351" "0.485972453" "455007891" "Farmers1"
## [49,] "0.75823863" "0.361468657" "455007891" "Farmers1"
## [50,] "0.89073484" "0.616021895" "455007891" "Farmers1"
## [51,] "0.27981088" "0.343922855" "562321452" "Farmers2"
## [52,] "0.66898771" "0.079483735" "562321452" "Farmers2"
## [53,] "0.07462722" "0.158309555" "562321452" "Farmers2"
## [54,] "0.41282601" "0.832440596" "562321452" "Farmers2"
## [55,] "0.06018988" "0.803370663" "562321452" "Farmers2"
## [56,] "0.56194497" "0.195324363" "562321452" "Farmers2"
## [57,] "0.44509420" "0.693574743" "562321452" "Farmers2"
## [58,] "0.83280090" "0.671593792" "562321452" "Farmers2"
## [59,] "0.76753424" "0.387378591" "562321452" "Farmers2"
## [60,] "0.85582218" "0.982635083" "562321452" "Farmers2"
## [61,] "0.88641538" "0.525571800" "945678934" "FoodIndustry"
## [62,] "0.19024977" "0.868567029" "945678934" "FoodIndustry"
## [63,] "0.08415354" "0.057131513" "945678934" "FoodIndustry"
## [64,] "0.66415492" "0.511431164" "945678934" "FoodIndustry"
## [65,] "0.36429666" "0.370514075" "945678934" "FoodIndustry"
## [66,] "0.45430476" "0.668504796" "945678934" "FoodIndustry"
## [67,] "0.24062877" "0.071933625" "945678934" "FoodIndustry"
## [68,] "0.97393412" "0.622084835" "945678934" "FoodIndustry"
## [69,] "0.64885761" "0.143441897" "945678934" "FoodIndustry"
## [70,] "0.45829515" "0.354596279" "945678934" "FoodIndustry"
## [71,] "0.83809622" "0.972833500" "389457902" "ChemicalIndustry"
## [72,] "0.66166889" "0.742808053" "389457902" "ChemicalIndustry"
## [73,] "0.24290970" "0.534019396" "389457902" "ChemicalIndustry"
## [74,] "0.77274992" "0.230826324" "389457902" "ChemicalIndustry"
## [75,] "0.77550891" "0.962347883" "389457902" "ChemicalIndustry"
## [76,] "0.24701364" "0.631533718" "389457902" "ChemicalIndustry"
## [77,] "0.58837541" "0.805967053" "389457902" "ChemicalIndustry"
## [78,] "0.20871605" "0.250639534" "389457902" "ChemicalIndustry"
## [79,] "0.10299392" "0.413812874" "389457902" "ChemicalIndustry"
## [80,] "0.31239864" "0.033331257" "389457902" "ChemicalIndustry"
## [81,] "0.25800905" "0.397515671" "374897109" "FertilizerIndustry"
## [82,] "0.73548144" "0.302799253" "374897109" "FertilizerIndustry"
## [83,] "0.72394882" "0.421190181" "374897109" "FertilizerIndustry"
## [84,] "0.30304457" "0.853019454" "374897109" "FertilizerIndustry"
## [85,] "0.21426962" "0.494717569" "374897109" "FertilizerIndustry"
## [86,] "0.32024035" "0.538717204" "374897109" "FertilizerIndustry"
## [87,] "0.74247218" "0.507587516" "374897109" "FertilizerIndustry"
## [88,] "0.47647602" "0.356155665" "374897109" "FertilizerIndustry"
## [89,] "0.22274525" "0.275880272" "374897109" "FertilizerIndustry"
## [90,] "0.06269738" "0.622976396" "374897109" "FertilizerIndustry"
model <- Mclust(TN1)
plot(model)