library("MASS")
## Warning: package 'MASS' was built under R version 4.1.3
data=trees
View(data)
#find a structure of data
data(trees)
trees
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
str(trees)
## 'data.frame': 31 obs. of 3 variables:
## $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
## $ Height: num 70 65 63 72 81 83 66 75 80 75 ...
## $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
# pre-processing
df=na.omit(trees)
df
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
#summary statistics
desc_sta=data.frame(Min=apply(df,2,min),Med=apply(df,2,median),
mean=apply(df,2,mean),SD=apply(df,2,sd),max=apply(df,2,max))
desc_sta
## Min Med mean SD max
## Girth 8.3 12.9 13.24839 3.138139 20.6
## Height 63.0 76.0 76.00000 6.371813 87.0
## Volume 10.2 24.2 30.17097 16.437846 77.0
# standardizing and scaling
desc_sta=round(desc_sta,1)
desc_sta
## Min Med mean SD max
## Girth 8.3 12.9 13.2 3.1 20.6
## Height 63.0 76.0 76.0 6.4 87.0
## Volume 10.2 24.2 30.2 16.4 77.0
df=scale(df)
head(df)
## Girth Height Volume
## 1 -1.5768542 -0.9416472 -1.2088547
## 2 -1.4812561 -1.7263533 -1.2088547
## 3 -1.4175241 -2.0402357 -1.2149382
## 4 -0.8758017 -0.6277648 -0.8377598
## 5 -0.8120696 0.7847060 -0.6917553
## 6 -0.7802036 1.0985884 -0.6370036
#cluster determination
ws=(nrow(df)-1)*sum(apply(df,2,var))
ws
## [1] 90
for (i in 2:15) ws [i] <- sum(fit=kmeans(df,centers=i,25)$withinss)
plot(1:15,ws,type="b",main="15 clusters",xlab="no. of. clusters",ylab="with cluster sum of squares")

# k means clustering
set.seed(20)
treecluster=kmeans(trees,6,nstart = 20)
treecluster
## K-means clustering with 6 clusters of sizes 1, 5, 10, 5, 4, 6
##
## Cluster means:
## Girth Height Volume
## 1 20.600 87.00000 77.00
## 2 17.740 80.60000 54.38
## 3 11.960 72.70000 21.01
## 4 11.440 81.80000 22.54
## 5 9.175 66.00000 11.60
## 6 14.650 77.66667 36.20
##
## Clustering vector:
## [1] 5 5 5 3 4 4 5 3 4 3 4 3 3 3 3 3 6 4 3 3 6 6 6 6 6 2 2 2 2 2 1
##
## Within cluster sum of squares by cluster:
## [1] 0.0000 41.4800 216.5330 83.7840 51.9075 187.8683
## (between_SS / total_SS = 94.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
#aggregate function
cluster_mean=aggregate(df,by=list(treecluster$cluster),FUN=mean)
cluster_mean
## Group.1 Girth Height Volume
## 1 1 2.3426667 1.7263533 2.8488545
## 2 2 1.4312984 0.7219295 1.4727618
## 3 3 -0.4105577 -0.5179060 -0.5573095
## 4 4 -0.5762611 0.9102590 -0.4642316
## 5 5 -1.2980265 -1.5694121 -1.1297689
## 6 6 0.4466383 0.2615687 0.3667775
#cluster visualization using plot cluster
plot(df,col=treecluster$cluster,pch=15)
points(treecluster$centers,col=1:6,pch=5)

library(cluster)
clusplot(df,treecluster$cluster,colour=TRUE,shade=TRUE,labels=2,lines=0)
## Warning in plot.window(...): "colour" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "colour" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "colour" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "colour" is not a
## graphical parameter
## Warning in box(...): "colour" is not a graphical parameter
## Warning in title(...): "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "colour" is not a graphical
## parameter
## Warning in polygon(z[[i]], density = if (shade) density[i] else 0, col =
## col.clus, : "colour" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "colour" is not a
## graphical parameter
## Warning in text.default(xy, labels = labs, ...): "colour" is not a graphical
## parameter
## Warning in text.default(xy, labels = labs, ...): "colour" is not a graphical
## parameter
library(fpc)
## Warning: package 'fpc' was built under R version 4.1.3

plotcluster(df,treecluster$cluster)
