Clustering Project

TNEB readings over various sectors

DataSet

*Has 45781 observations and contains kW, kVA, ServiceID and Type variables

Contents

k-means, Flexclust, **apclust, agglomerative clustering, tclust, trimmedclust, Fuzzymeans, amap, mclust.

TN1 <- read.table("C:\\Users\\Hari\\Desktop\\TN1.csv", header=TRUE, sep = ",")
Response <- TN1[,dim(TN1)[2]]
Predictor <- TN1[,1:(dim(TN1)[2]-1)]

pca <- princomp(Predictor, cor=T) # principal components analysis for dimension reduction
pc.comp <- pca$scores
pc.comp1 <- -1*pc.comp[,1] # principal component 1 scores (negated for convenience)
pc.comp2 <- -1*pc.comp[,2] # principal component 2 scores (negated for convenience)
summary(pca)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3
## Standard deviation     1.0507150 0.9984301 0.9482275
## Proportion of Variance 0.3680006 0.3322876 0.2997118
## Cumulative Proportion  0.3680006 0.7002882 1.0000000
## K-Means
X <- cbind(pc.comp1, pc.comp2)
cl <- kmeans(X,5)
cl$cluster
##  [1] 4 2 1 1 1 2 4 1 2 4 3 1 5 4 5 5 1 1 1 4 5 1 3 4 5 1 4 5 1 3 3 2 4 1 2
## [36] 4 4 4 2 4 3 1 3 3 4 1 5 1 5 5 1 4 1 3 1 4 3 5 4 5 4 1 2 4 2 4 2 4 4 2
## [71] 5 5 3 5 5 3 3 1 1 1 1 5 5 3 1 3 5 3 1 1
plot(pc.comp1, pc.comp2,col=cl$cluster)
points(cl$centers, pch=3)

## K-Centroids Cluster Analysis (Flexclust)
library(flexclust)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Loading required package: stats4
cl1 = kcca(Predictor, k=4)
cl1
## kcca object of family 'kmeans' 
## 
## call:
## kcca(x = Predictor, k = 4)
## 
## cluster sizes:
## 
##  1  2  3  4 
## 40 10 20 20
## A barplot of the centroids 
barplot(cl1)

## now using k-medians and kmeans++ initialization, cluster centroids
## should be similar...
cl2 = kcca(Predictor, k=4, family=kccaFamily("kmedians"), control=list(initcent="kmeanspp"))
cl2
## kcca object of family 'kmedians' 
## 
## call:
## kcca(x = Predictor, k = 4, family = kccaFamily("kmedians"), 
##     control = list(initcent = "kmeanspp"))
## 
## cluster sizes:
## 
##  1  2  3  4 
## 20 20 40 10
## A barplot of the centroids 
barplot(cl2)

## apcluster
## create two Gaussian clouds
cl1 <- cbind(rnorm(TN1$ForkVA.real),rnorm(TN1$ServiceID))
cl2 <- cbind(rnorm(TN1$ForkW.real),rnorm(TN1$ServiceID))
x <- rbind(cl1,cl2)

## compute similarity matrix
library(apcluster)
## 
## Attaching package: 'apcluster'
## The following object is masked from 'package:stats':
## 
##     heatmap
sim <- negDistMat(x, r=2)
## running affinity propagation (apcluster)
apres <- apcluster(sim, details=TRUE)
## plotting clustering result
plot(apres, x)

## employing agglomerative clustering to join clusters
aggres <- aggExCluster(sim, apres)
## plotting clustering result
plot(aggres, x)

## Plotting Heatmap
heatmap(apres, sim)

## Trimmed cluster
library(tclust)
## Loading required package: mvtnorm
## Loading required package: sn
## 
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
## 
##     sd
## Loading required package: mclust
## Package 'mclust' version 5.1
## Type 'citation("mclust")' for citing this R package in publications.
## Loading required package: cluster
clus <- tclust (TN1, k = 3, alpha = 0.1)

# Pairs plot of the clustering solution
pairs (TN1, col = clus$cluster + 1)

# Two coordinates
plot (clus)

#discriminant factor
dsc.1 <- DiscrFact(clus)
plot(dsc.1)

library(trimcluster)
y <- cbind(rnorm(TN1$ForkVA.real),rnorm(TN1$ServiceID))
tc <- trimkmeans(Predictor, 3, trim=0.1)
tc
## * trimmed k-means *
## trim= 0.1 , k= 3 
## Classification (trimmed points are indicated by  4 ):
##  [1] 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 2 4 4 4 4
## [36] 4 4 4 4 4 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2
## [71] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Means:
##      ForkVA.real ForkW.real ServiceID
## [1,]   0.4749531  0.4834966 419092838
## [2,]   0.4927409  0.4555299 930162779
## [3,]   0.4619374  0.4730090 604775427
## Trimmed mean squares:  1.847054e+15
summary(tc)
##                Length Class  Mode   
## classification 90     -none- numeric
## means           9     -none- numeric
## criterion       1     -none- numeric
## disttom        90     -none- numeric
## ropt            1     -none- numeric
## k               1     -none- numeric
## trim            1     -none- numeric
## runs            1     -none- numeric
## scaling         1     -none- logical
## Fuzzy Means
fannyx <- fanny(TN1, 4)
summary(fannyx)
## Fuzzy Clustering object of class 'fanny' :                          
## m.ship.expon.        2    
## objective     1.094687e+09
## tolerance        1e-15    
## iterations          52    
## converged            1    
## maxit              500    
## n                   90    
## Membership coefficients (in %, rounded):
##       [,1] [,2] [,3] [,4]
##  [1,]   58   16   14   12
##  [2,]   58   16   14   12
##  [3,]   58   16   14   12
##  [4,]   58   16   14   12
##  [5,]   58   16   14   12
##  [6,]   58   16   14   12
##  [7,]   58   16   14   12
##  [8,]   58   16   14   12
##  [9,]   58   16   14   12
## [10,]   58   16   14   12
## [11,]    1   98    0    1
## [12,]    1   98    0    1
## [13,]    1   98    0    1
## [14,]    1   98    0    1
## [15,]    1   98    0    1
## [16,]    1   98    0    1
## [17,]    1   98    0    1
## [18,]    1   98    0    1
## [19,]    1   98    0    1
## [20,]    1   98    0    1
## [21,]   88    6    2    4
## [22,]   88    6    2    4
## [23,]   88    6    2    4
## [24,]   88    6    2    4
## [25,]   88    6    2    4
## [26,]   88    6    2    4
## [27,]   88    6    2    4
## [28,]   88    6    2    4
## [29,]   88    6    2    4
## [30,]   88    6    2    4
## [31,]   32   16   39   13
## [32,]   32   16   39   13
## [33,]   32   16   39   13
## [34,]   32   16   39   13
## [35,]   32   16   39   13
## [36,]   32   16   39   13
## [37,]   32   16   39   13
## [38,]   32   16   39   13
## [39,]   32   16   39   13
## [40,]   32   16   39   13
## [41,]    1   98    0    1
## [42,]    1   98    0    1
## [43,]    1   98    0    1
## [44,]    1   98    0    1
## [45,]    1   98    0    1
## [46,]    1   98    0    1
## [47,]    1   98    0    1
## [48,]    1   98    0    1
## [49,]    1   98    0    1
## [50,]    1   98    0    1
## [51,]   79   11    3    6
## [52,]   79   11    3    6
## [53,]   79   11    3    6
## [54,]   79   11    3    6
## [55,]   79   11    3    6
## [56,]   79   11    3    6
## [57,]   79   11    3    6
## [58,]   79   11    3    6
## [59,]   79   11    3    6
## [60,]   79   11    3    6
## [61,]    1    1   97    1
## [62,]    1    1   97    1
## [63,]    1    1   97    1
## [64,]    1    1   97    1
## [65,]    1    1   97    1
## [66,]    1    1   97    1
## [67,]    1    1   97    1
## [68,]    1    1   97    1
## [69,]    1    1   97    1
## [70,]    1    1   97    1
## [71,]    2    5    1   92
## [72,]    2    5    1   92
## [73,]    2    5    1   92
## [74,]    2    5    1   92
## [75,]    2    5    1   92
## [76,]    2    5    1   92
## [77,]    2    5    1   92
## [78,]    2    5    1   92
## [79,]    2    5    1   92
## [80,]    2    5    1   92
## [81,]    2    4    1   93
## [82,]    2    4    1   93
## [83,]    2    4    1   93
## [84,]    2    4    1   93
## [85,]    2    4    1   93
## [86,]    2    4    1   93
## [87,]    2    4    1   93
## [88,]    2    4    1   93
## [89,]    2    4    1   93
## [90,]    2    4    1   93
## Fuzzyness coefficients:
## dunn_coeff normalized 
##  0.7480220  0.6640294 
## Closest hard clustering:
##  [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3
## [36] 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3
## [71] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 
## Silhouette plot information:
##    cluster neighbor sil_width
## 23       1        2 0.7001655
## 21       1        2 0.7001655
## 27       1        2 0.7001655
## 26       1        2 0.7001655
## 30       1        2 0.7001655
## 29       1        2 0.7001655
## 25       1        2 0.7001655
## 28       1        2 0.7001655
## 22       1        2 0.7001655
## 24       1        2 0.7001655
## 9        1        3 0.6381402
## 5        1        3 0.6381402
## 3        1        3 0.6381402
## 2        1        3 0.6381402
## 6        1        3 0.6381402
## 10       1        3 0.6381402
## 7        1        3 0.6381402
## 8        1        3 0.6381402
## 4        1        3 0.6381402
## 1        1        3 0.6381402
## 57       1        2 0.5869010
## 51       1        2 0.5869010
## 59       1        2 0.5869010
## 56       1        2 0.5869010
## 54       1        2 0.5869010
## 58       1        2 0.5869010
## 52       1        2 0.5869010
## 55       1        2 0.5869010
## 53       1        2 0.5869010
## 60       1        2 0.5869010
## 14       2        4 0.9859293
## 16       2        4 0.9859293
## 18       2        4 0.9859293
## 17       2        4 0.9859293
## 13       2        4 0.9859293
## 15       2        4 0.9859293
## 20       2        4 0.9859293
## 19       2        4 0.9859293
## 12       2        4 0.9859293
## 11       2        4 0.9859293
## 44       2        4 0.9855428
## 47       2        4 0.9855428
## 43       2        4 0.9855428
## 49       2        4 0.9855428
## 46       2        4 0.9855428
## 41       2        4 0.9855428
## 50       2        4 0.9855428
## 48       2        4 0.9855428
## 45       2        4 0.9855428
## 42       2        4 0.9855428
## 70       3        1 0.7364933
## 65       3        1 0.7364933
## 64       3        1 0.7364933
## 66       3        1 0.7364933
## 69       3        1 0.7364933
## 61       3        1 0.7364933
## 67       3        1 0.7364933
## 68       3        1 0.7364933
## 63       3        1 0.7364933
## 62       3        1 0.7364933
## 40       3        1 0.4722870
## 36       3        1 0.4722870
## 39       3        1 0.4722870
## 33       3        1 0.4722870
## 31       3        1 0.4722870
## 32       3        1 0.4722870
## 35       3        1 0.4722870
## 34       3        1 0.4722870
## 37       3        1 0.4722870
## 38       3        1 0.4722870
## 86       4        2 0.9055175
## 81       4        2 0.9055175
## 88       4        2 0.9055175
## 85       4        2 0.9055175
## 89       4        2 0.9055175
## 83       4        2 0.9055175
## 87       4        2 0.9055175
## 82       4        2 0.9055175
## 90       4        2 0.9055175
## 84       4        2 0.9055175
## 73       4        2 0.8848453
## 76       4        2 0.8848453
## 72       4        2 0.8848453
## 77       4        2 0.8848453
## 79       4        2 0.8848453
## 78       4        2 0.8848453
## 75       4        2 0.8848453
## 74       4        2 0.8848453
## 71       4        2 0.8848453
## 80       4        2 0.8848453
## Average silhouette width per cluster:
## [1] 0.6417356 0.9857360 0.6043901 0.8951814
## Average silhouette width of total data set:
## [1] 0.7662024
## 
## 4005 dissimilarities, summarized :
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##         0  80111000 172860000 199540000 296110000 570780000 
## Metric :  euclidean 
## Number of objects : 90
## 
## Available components:
##  [1] "membership"  "coeff"       "memb.exp"    "clustering"  "k.crisp"    
##  [6] "objective"   "convergence" "diss"        "call"        "silinfo"    
## [11] "data"
plot(fannyx)

library(amap)
hc <- hcluster(TN1,link = "ave")
## Warning in hcluster(TN1, link = "ave"): NAs introduced by coercion
plot(hc)

plot(hc, hang = -1)

library(reshape2)
acast(TN1, TN1$Type~TN1$ServiceID, value.var="ForkW.real")
## Aggregation function missing: defaulting to length
##                    374897109 389457902 455007891 457008451 562321452
## AutomobileIndustry         0         0         0        10         0
## Bank                       0         0         0         0         0
## BpoIndustry                0         0         0         0         0
## CementIndustry             0         0         0         0         0
## ChemicalIndustry           0        10         0         0         0
## Farmers1                   0         0        10         0         0
## Farmers2                   0         0         0         0        10
## FertilizerIndustry        10         0         0         0         0
## FoodIndustry               0         0         0         0         0
##                    581000256 671004572 775001231 945678934
## AutomobileIndustry         0         0         0         0
## Bank                       0        10         0         0
## BpoIndustry               10         0         0         0
## CementIndustry             0         0        10         0
## ChemicalIndustry           0         0         0         0
## Farmers1                   0         0         0         0
## Farmers2                   0         0         0         0
## FertilizerIndustry         0         0         0         0
## FoodIndustry               0         0         0        10
as.matrix(TN1)
##       ForkVA.real  ForkW.real    ServiceID   Type                
##  [1,] "0.86593564" "0.143762529" "671004572" "Bank"              
##  [2,] "0.12980418" "0.088929798" "671004572" "Bank"              
##  [3,] "0.06180149" "0.552047074" "671004572" "Bank"              
##  [4,] "0.09911646" "0.848172019" "671004572" "Bank"              
##  [5,] "0.20570391" "0.624722466" "671004572" "Bank"              
##  [6,] "0.16402897" "0.038167964" "671004572" "Bank"              
##  [7,] "0.61983410" "0.079610681" "671004572" "Bank"              
##  [8,] "0.01132422" "0.718472400" "671004572" "Bank"              
##  [9,] "0.34446118" "0.271178428" "671004572" "Bank"              
## [10,] "0.67916915" "0.191223431" "671004572" "Bank"              
## [11,] "0.49139957" "0.999932744" "457008451" "AutomobileIndustry"
## [12,] "0.07652585" "0.281726878" "457008451" "AutomobileIndustry"
## [13,] "0.87515833" "0.464859129" "457008451" "AutomobileIndustry"
## [14,] "0.64991327" "0.193133884" "457008451" "AutomobileIndustry"
## [15,] "0.78757786" "0.669413202" "457008451" "AutomobileIndustry"
## [16,] "0.81171847" "0.332674854" "457008451" "AutomobileIndustry"
## [17,] "0.24022092" "0.497692577" "457008451" "AutomobileIndustry"
## [18,] "0.29985077" "0.172019319" "457008451" "AutomobileIndustry"
## [19,] "0.12245926" "0.177083235" "457008451" "AutomobileIndustry"
## [20,] "0.86977533" "0.170256120" "457008451" "AutomobileIndustry"
## [21,] "0.77759558" "0.534224742" "581000256" "BpoIndustry"       
## [22,] "0.10862193" "0.457122953" "581000256" "BpoIndustry"       
## [23,] "0.41906573" "0.642565168" "581000256" "BpoIndustry"       
## [24,] "0.87731605" "0.048764741" "581000256" "BpoIndustry"       
## [25,] "0.87527674" "0.775392277" "581000256" "BpoIndustry"       
## [26,] "0.29715735" "0.514345606" "581000256" "BpoIndustry"       
## [27,] "0.74133330" "0.398913153" "581000256" "BpoIndustry"       
## [28,] "0.67922815" "0.985437729" "581000256" "BpoIndustry"       
## [29,] "0.44705484" "0.273268861" "581000256" "BpoIndustry"       
## [30,] "0.49465370" "0.855913294" "581000256" "BpoIndustry"       
## [31,] "0.45485969" "0.817051560" "775001231" "CementIndustry"    
## [32,] "0.28366765" "0.104793120" "775001231" "CementIndustry"    
## [33,] "0.60768234" "0.687070454" "775001231" "CementIndustry"    
## [34,] "0.18650264" "0.950588651" "775001231" "CementIndustry"    
## [35,] "0.07619765" "0.315548891" "775001231" "CementIndustry"    
## [36,] "0.51976463" "0.632151768" "775001231" "CementIndustry"    
## [37,] "0.87652107" "0.055456617" "775001231" "CementIndustry"    
## [38,] "0.83887772" "0.009060051" "775001231" "CementIndustry"    
## [39,] "0.32140231" "0.522392850" "775001231" "CementIndustry"    
## [40,] "0.51113739" "0.529004180" "775001231" "CementIndustry"    
## [41,] "0.39296486" "0.911056541" "455007891" "Farmers1"          
## [42,] "0.11657597" "0.050001042" "455007891" "Farmers1"          
## [43,] "0.38223472" "0.836797447" "455007891" "Farmers1"          
## [44,] "0.39608830" "0.687679051" "455007891" "Farmers1"          
## [45,] "0.91914764" "0.143218874" "455007891" "Farmers1"          
## [46,] "0.32520363" "0.220849050" "455007891" "Farmers1"          
## [47,] "0.68026602" "0.719326435" "455007891" "Farmers1"          
## [48,] "0.10225351" "0.485972453" "455007891" "Farmers1"          
## [49,] "0.75823863" "0.361468657" "455007891" "Farmers1"          
## [50,] "0.89073484" "0.616021895" "455007891" "Farmers1"          
## [51,] "0.27981088" "0.343922855" "562321452" "Farmers2"          
## [52,] "0.66898771" "0.079483735" "562321452" "Farmers2"          
## [53,] "0.07462722" "0.158309555" "562321452" "Farmers2"          
## [54,] "0.41282601" "0.832440596" "562321452" "Farmers2"          
## [55,] "0.06018988" "0.803370663" "562321452" "Farmers2"          
## [56,] "0.56194497" "0.195324363" "562321452" "Farmers2"          
## [57,] "0.44509420" "0.693574743" "562321452" "Farmers2"          
## [58,] "0.83280090" "0.671593792" "562321452" "Farmers2"          
## [59,] "0.76753424" "0.387378591" "562321452" "Farmers2"          
## [60,] "0.85582218" "0.982635083" "562321452" "Farmers2"          
## [61,] "0.88641538" "0.525571800" "945678934" "FoodIndustry"      
## [62,] "0.19024977" "0.868567029" "945678934" "FoodIndustry"      
## [63,] "0.08415354" "0.057131513" "945678934" "FoodIndustry"      
## [64,] "0.66415492" "0.511431164" "945678934" "FoodIndustry"      
## [65,] "0.36429666" "0.370514075" "945678934" "FoodIndustry"      
## [66,] "0.45430476" "0.668504796" "945678934" "FoodIndustry"      
## [67,] "0.24062877" "0.071933625" "945678934" "FoodIndustry"      
## [68,] "0.97393412" "0.622084835" "945678934" "FoodIndustry"      
## [69,] "0.64885761" "0.143441897" "945678934" "FoodIndustry"      
## [70,] "0.45829515" "0.354596279" "945678934" "FoodIndustry"      
## [71,] "0.83809622" "0.972833500" "389457902" "ChemicalIndustry"  
## [72,] "0.66166889" "0.742808053" "389457902" "ChemicalIndustry"  
## [73,] "0.24290970" "0.534019396" "389457902" "ChemicalIndustry"  
## [74,] "0.77274992" "0.230826324" "389457902" "ChemicalIndustry"  
## [75,] "0.77550891" "0.962347883" "389457902" "ChemicalIndustry"  
## [76,] "0.24701364" "0.631533718" "389457902" "ChemicalIndustry"  
## [77,] "0.58837541" "0.805967053" "389457902" "ChemicalIndustry"  
## [78,] "0.20871605" "0.250639534" "389457902" "ChemicalIndustry"  
## [79,] "0.10299392" "0.413812874" "389457902" "ChemicalIndustry"  
## [80,] "0.31239864" "0.033331257" "389457902" "ChemicalIndustry"  
## [81,] "0.25800905" "0.397515671" "374897109" "FertilizerIndustry"
## [82,] "0.73548144" "0.302799253" "374897109" "FertilizerIndustry"
## [83,] "0.72394882" "0.421190181" "374897109" "FertilizerIndustry"
## [84,] "0.30304457" "0.853019454" "374897109" "FertilizerIndustry"
## [85,] "0.21426962" "0.494717569" "374897109" "FertilizerIndustry"
## [86,] "0.32024035" "0.538717204" "374897109" "FertilizerIndustry"
## [87,] "0.74247218" "0.507587516" "374897109" "FertilizerIndustry"
## [88,] "0.47647602" "0.356155665" "374897109" "FertilizerIndustry"
## [89,] "0.22274525" "0.275880272" "374897109" "FertilizerIndustry"
## [90,] "0.06269738" "0.622976396" "374897109" "FertilizerIndustry"
model <- Mclust(TN1)
plot(model)