library("MASS")
## Warning: package 'MASS' was built under R version 4.1.3
data=trees
View(data)
 
#find a structure of data
data(trees)
trees
##    Girth Height Volume
## 1    8.3     70   10.3
## 2    8.6     65   10.3
## 3    8.8     63   10.2
## 4   10.5     72   16.4
## 5   10.7     81   18.8
## 6   10.8     83   19.7
## 7   11.0     66   15.6
## 8   11.0     75   18.2
## 9   11.1     80   22.6
## 10  11.2     75   19.9
## 11  11.3     79   24.2
## 12  11.4     76   21.0
## 13  11.4     76   21.4
## 14  11.7     69   21.3
## 15  12.0     75   19.1
## 16  12.9     74   22.2
## 17  12.9     85   33.8
## 18  13.3     86   27.4
## 19  13.7     71   25.7
## 20  13.8     64   24.9
## 21  14.0     78   34.5
## 22  14.2     80   31.7
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  17.3     81   55.4
## 27  17.5     82   55.7
## 28  17.9     80   58.3
## 29  18.0     80   51.5
## 30  18.0     80   51.0
## 31  20.6     87   77.0
str(trees)
## 'data.frame':    31 obs. of  3 variables:
##  $ Girth : num  8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
##  $ Height: num  70 65 63 72 81 83 66 75 80 75 ...
##  $ Volume: num  10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
# pre-processing

df=na.omit(trees)
df
##    Girth Height Volume
## 1    8.3     70   10.3
## 2    8.6     65   10.3
## 3    8.8     63   10.2
## 4   10.5     72   16.4
## 5   10.7     81   18.8
## 6   10.8     83   19.7
## 7   11.0     66   15.6
## 8   11.0     75   18.2
## 9   11.1     80   22.6
## 10  11.2     75   19.9
## 11  11.3     79   24.2
## 12  11.4     76   21.0
## 13  11.4     76   21.4
## 14  11.7     69   21.3
## 15  12.0     75   19.1
## 16  12.9     74   22.2
## 17  12.9     85   33.8
## 18  13.3     86   27.4
## 19  13.7     71   25.7
## 20  13.8     64   24.9
## 21  14.0     78   34.5
## 22  14.2     80   31.7
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  17.3     81   55.4
## 27  17.5     82   55.7
## 28  17.9     80   58.3
## 29  18.0     80   51.5
## 30  18.0     80   51.0
## 31  20.6     87   77.0
#summary statistics

desc_sta=data.frame(Min=apply(df,2,min),Med=apply(df,2,median),
                    mean=apply(df,2,mean),SD=apply(df,2,sd),max=apply(df,2,max))
desc_sta
##         Min  Med     mean        SD  max
## Girth   8.3 12.9 13.24839  3.138139 20.6
## Height 63.0 76.0 76.00000  6.371813 87.0
## Volume 10.2 24.2 30.17097 16.437846 77.0
# standardizing and scaling
desc_sta=round(desc_sta,1)
desc_sta
##         Min  Med mean   SD  max
## Girth   8.3 12.9 13.2  3.1 20.6
## Height 63.0 76.0 76.0  6.4 87.0
## Volume 10.2 24.2 30.2 16.4 77.0
df=scale(df)
head(df)
##        Girth     Height     Volume
## 1 -1.5768542 -0.9416472 -1.2088547
## 2 -1.4812561 -1.7263533 -1.2088547
## 3 -1.4175241 -2.0402357 -1.2149382
## 4 -0.8758017 -0.6277648 -0.8377598
## 5 -0.8120696  0.7847060 -0.6917553
## 6 -0.7802036  1.0985884 -0.6370036
#cluster determination 

ws=(nrow(df)-1)*sum(apply(df,2,var))
ws
## [1] 90
for (i in 2:15) ws [i] <- sum(fit=kmeans(df,centers=i,25)$withinss)
plot(1:15,ws,type="b",main="15 clusters",xlab="no. of. clusters",ylab="with cluster sum of squares")

# k means clustering

set.seed(20)
treecluster=kmeans(trees,6,nstart = 20)
treecluster
## K-means clustering with 6 clusters of sizes 1, 5, 10, 5, 4, 6
## 
## Cluster means:
##    Girth   Height Volume
## 1 20.600 87.00000  77.00
## 2 17.740 80.60000  54.38
## 3 11.960 72.70000  21.01
## 4 11.440 81.80000  22.54
## 5  9.175 66.00000  11.60
## 6 14.650 77.66667  36.20
## 
## Clustering vector:
##  [1] 5 5 5 3 4 4 5 3 4 3 4 3 3 3 3 3 6 4 3 3 6 6 6 6 6 2 2 2 2 2 1
## 
## Within cluster sum of squares by cluster:
## [1]   0.0000  41.4800 216.5330  83.7840  51.9075 187.8683
##  (between_SS / total_SS =  94.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
#aggregate function

cluster_mean=aggregate(df,by=list(treecluster$cluster),FUN=mean)
cluster_mean
##   Group.1      Girth     Height     Volume
## 1       1  2.3426667  1.7263533  2.8488545
## 2       2  1.4312984  0.7219295  1.4727618
## 3       3 -0.4105577 -0.5179060 -0.5573095
## 4       4 -0.5762611  0.9102590 -0.4642316
## 5       5 -1.2980265 -1.5694121 -1.1297689
## 6       6  0.4466383  0.2615687  0.3667775
#cluster visualization using plot cluster

plot(df,col=treecluster$cluster,pch=15)
points(treecluster$centers,col=1:6,pch=5)

library(cluster)
clusplot(df,treecluster$cluster,colour=TRUE,shade=TRUE,labels=2,lines=0)
## Warning in plot.window(...): "colour" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "colour" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "colour" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "colour" is not a
## graphical parameter
## Warning in box(...): "colour" is not a graphical parameter
## Warning in title(...): "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter

## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in text.default(xy, labels = labs, ...): "colour" is not a graphical
## parameter

## Warning in text.default(xy, labels = labs, ...): "colour" is not a graphical
## parameter
library(fpc)
## Warning: package 'fpc' was built under R version 4.1.3

plotcluster(df,treecluster$cluster)