K Means Clustering

Crime Dataset

Assignment 4

mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\6 Hierarchial Clustering\\Assignment\\crime_data.csv")


normalized_data <- scale(mydata[,2:5])

fit <- kmeans(normalized_data, 5) # 5 cluster solution

str(fit)
## List of 9
##  $ cluster     : int [1:50] 4 4 4 1 4 4 2 1 4 4 ...
##  $ centers     : num [1:5, 1:4] -0.168 -0.696 -1.05 1.043 -1.118 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:5] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:4] "Murder" "Assault" "UrbanPop" "Rape"
##  $ totss       : num 196
##  $ withinss    : num [1:5] 9.89 5.24 2.75 45.26 2.2
##  $ tot.withinss: num 65.3
##  $ betweenss   : num 131
##  $ size        : int [1:5] 12 7 7 19 5
##  $ iter        : int 2
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"
fit$centers
##       Murder    Assault    UrbanPop        Rape
## 1 -0.1675273 -0.2141089 -0.03154916 -0.02476943
## 2 -0.6958674 -0.5679476  1.12728218 -0.55096728
## 3 -1.0500985 -1.0736357 -0.44195146 -0.83923219
## 4  1.0431796  1.0626143  0.19176752  0.85238754
## 5 -1.1176648 -1.2258563 -1.61246159 -1.23334676
final<- data.frame(mydata, fit$cluster) # append cluster membership

str(final)
## 'data.frame':    50 obs. of  6 variables:
##  $ X          : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Murder     : num  13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
##  $ Assault    : int  236 263 294 190 276 204 110 238 335 211 ...
##  $ UrbanPop   : int  58 48 80 50 91 78 77 72 80 60 ...
##  $ Rape       : num  21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
##  $ fit.cluster: int  4 4 4 1 4 4 2 1 4 4 ...
final
##                 X Murder Assault UrbanPop Rape fit.cluster
## 1         Alabama   13.2     236       58 21.2           4
## 2          Alaska   10.0     263       48 44.5           4
## 3         Arizona    8.1     294       80 31.0           4
## 4        Arkansas    8.8     190       50 19.5           1
## 5      California    9.0     276       91 40.6           4
## 6        Colorado    7.9     204       78 38.7           4
## 7     Connecticut    3.3     110       77 11.1           2
## 8        Delaware    5.9     238       72 15.8           1
## 9         Florida   15.4     335       80 31.9           4
## 10        Georgia   17.4     211       60 25.8           4
## 11         Hawaii    5.3      46       83 20.2           2
## 12          Idaho    2.6     120       54 14.2           3
## 13       Illinois   10.4     249       83 24.0           4
## 14        Indiana    7.2     113       65 21.0           1
## 15           Iowa    2.2      56       57 11.3           3
## 16         Kansas    6.0     115       66 18.0           1
## 17       Kentucky    9.7     109       52 16.3           1
## 18      Louisiana   15.4     249       66 22.2           4
## 19          Maine    2.1      83       51  7.8           5
## 20       Maryland   11.3     300       67 27.8           4
## 21  Massachusetts    4.4     149       85 16.3           2
## 22       Michigan   12.1     255       74 35.1           4
## 23      Minnesota    2.7      72       66 14.9           3
## 24    Mississippi   16.1     259       44 17.1           4
## 25       Missouri    9.0     178       70 28.2           1
## 26        Montana    6.0     109       53 16.4           3
## 27       Nebraska    4.3     102       62 16.5           3
## 28         Nevada   12.2     252       81 46.0           4
## 29  New Hampshire    2.1      57       56  9.5           3
## 30     New Jersey    7.4     159       89 18.8           2
## 31     New Mexico   11.4     285       70 32.1           4
## 32       New York   11.1     254       86 26.1           4
## 33 North Carolina   13.0     337       45 16.1           4
## 34   North Dakota    0.8      45       44  7.3           5
## 35           Ohio    7.3     120       75 21.4           1
## 36       Oklahoma    6.6     151       68 20.0           1
## 37         Oregon    4.9     159       67 29.3           1
## 38   Pennsylvania    6.3     106       72 14.9           2
## 39   Rhode Island    3.4     174       87  8.3           2
## 40 South Carolina   14.4     279       48 22.5           4
## 41   South Dakota    3.8      86       45 12.8           5
## 42      Tennessee   13.2     188       59 26.9           4
## 43          Texas   12.7     201       80 25.5           4
## 44           Utah    3.2     120       80 22.9           2
## 45        Vermont    2.2      48       32 11.2           5
## 46       Virginia    8.5     156       63 20.7           1
## 47     Washington    4.0     145       73 26.2           1
## 48  West Virginia    5.7      81       39  9.3           5
## 49      Wisconsin    2.6      53       66 10.8           3
## 50        Wyoming    6.8     161       60 15.6           1
x <- aggregate(mydata[,2:5], by=list(fit$cluster), FUN=mean)
x
##   Group.1    Murder   Assault UrbanPop     Rape
## 1       1  7.058333 152.91667 65.08333 21.00000
## 2       2  4.757143 123.42857 81.85714 16.07143
## 3       3  3.214286  81.28571 59.14286 13.37143
## 4       4 12.331579 259.31579 68.31579 29.21579
## 5       5  2.920000  68.60000 42.20000  9.68000
#install.packages("animation")

library(animation)

x1 <- kmeans.ani(normalized_data, 5, col = 1:5)