Assignment 4
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\6 Hierarchial Clustering\\Assignment\\crime_data.csv")
normalized_data <- scale(mydata[,2:5])
fit <- kmeans(normalized_data, 5) # 5 cluster solution
str(fit)
## List of 9
## $ cluster : int [1:50] 4 4 4 1 4 4 2 1 4 4 ...
## $ centers : num [1:5, 1:4] -0.168 -0.696 -1.05 1.043 -1.118 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:5] "1" "2" "3" "4" ...
## .. ..$ : chr [1:4] "Murder" "Assault" "UrbanPop" "Rape"
## $ totss : num 196
## $ withinss : num [1:5] 9.89 5.24 2.75 45.26 2.2
## $ tot.withinss: num 65.3
## $ betweenss : num 131
## $ size : int [1:5] 12 7 7 19 5
## $ iter : int 2
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fit$centers
## Murder Assault UrbanPop Rape
## 1 -0.1675273 -0.2141089 -0.03154916 -0.02476943
## 2 -0.6958674 -0.5679476 1.12728218 -0.55096728
## 3 -1.0500985 -1.0736357 -0.44195146 -0.83923219
## 4 1.0431796 1.0626143 0.19176752 0.85238754
## 5 -1.1176648 -1.2258563 -1.61246159 -1.23334676
final<- data.frame(mydata, fit$cluster) # append cluster membership
str(final)
## 'data.frame': 50 obs. of 6 variables:
## $ X : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Murder : num 13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
## $ Assault : int 236 263 294 190 276 204 110 238 335 211 ...
## $ UrbanPop : int 58 48 80 50 91 78 77 72 80 60 ...
## $ Rape : num 21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
## $ fit.cluster: int 4 4 4 1 4 4 2 1 4 4 ...
final
## X Murder Assault UrbanPop Rape fit.cluster
## 1 Alabama 13.2 236 58 21.2 4
## 2 Alaska 10.0 263 48 44.5 4
## 3 Arizona 8.1 294 80 31.0 4
## 4 Arkansas 8.8 190 50 19.5 1
## 5 California 9.0 276 91 40.6 4
## 6 Colorado 7.9 204 78 38.7 4
## 7 Connecticut 3.3 110 77 11.1 2
## 8 Delaware 5.9 238 72 15.8 1
## 9 Florida 15.4 335 80 31.9 4
## 10 Georgia 17.4 211 60 25.8 4
## 11 Hawaii 5.3 46 83 20.2 2
## 12 Idaho 2.6 120 54 14.2 3
## 13 Illinois 10.4 249 83 24.0 4
## 14 Indiana 7.2 113 65 21.0 1
## 15 Iowa 2.2 56 57 11.3 3
## 16 Kansas 6.0 115 66 18.0 1
## 17 Kentucky 9.7 109 52 16.3 1
## 18 Louisiana 15.4 249 66 22.2 4
## 19 Maine 2.1 83 51 7.8 5
## 20 Maryland 11.3 300 67 27.8 4
## 21 Massachusetts 4.4 149 85 16.3 2
## 22 Michigan 12.1 255 74 35.1 4
## 23 Minnesota 2.7 72 66 14.9 3
## 24 Mississippi 16.1 259 44 17.1 4
## 25 Missouri 9.0 178 70 28.2 1
## 26 Montana 6.0 109 53 16.4 3
## 27 Nebraska 4.3 102 62 16.5 3
## 28 Nevada 12.2 252 81 46.0 4
## 29 New Hampshire 2.1 57 56 9.5 3
## 30 New Jersey 7.4 159 89 18.8 2
## 31 New Mexico 11.4 285 70 32.1 4
## 32 New York 11.1 254 86 26.1 4
## 33 North Carolina 13.0 337 45 16.1 4
## 34 North Dakota 0.8 45 44 7.3 5
## 35 Ohio 7.3 120 75 21.4 1
## 36 Oklahoma 6.6 151 68 20.0 1
## 37 Oregon 4.9 159 67 29.3 1
## 38 Pennsylvania 6.3 106 72 14.9 2
## 39 Rhode Island 3.4 174 87 8.3 2
## 40 South Carolina 14.4 279 48 22.5 4
## 41 South Dakota 3.8 86 45 12.8 5
## 42 Tennessee 13.2 188 59 26.9 4
## 43 Texas 12.7 201 80 25.5 4
## 44 Utah 3.2 120 80 22.9 2
## 45 Vermont 2.2 48 32 11.2 5
## 46 Virginia 8.5 156 63 20.7 1
## 47 Washington 4.0 145 73 26.2 1
## 48 West Virginia 5.7 81 39 9.3 5
## 49 Wisconsin 2.6 53 66 10.8 3
## 50 Wyoming 6.8 161 60 15.6 1
x <- aggregate(mydata[,2:5], by=list(fit$cluster), FUN=mean)
x
## Group.1 Murder Assault UrbanPop Rape
## 1 1 7.058333 152.91667 65.08333 21.00000
## 2 2 4.757143 123.42857 81.85714 16.07143
## 3 3 3.214286 81.28571 59.14286 13.37143
## 4 4 12.331579 259.31579 68.31579 29.21579
## 5 5 2.920000 68.60000 42.20000 9.68000
#install.packages("animation")
library(animation)
x1 <- kmeans.ani(normalized_data, 5, col = 1:5)











