CrimeData using KMeans Clustering:

The Cluster with maximum # of murders seems to be a major threat to Live.

The 2nd most cluster with a maximum on Assault, Rape and other factors takes the 2nd rank

on the most dangerous state to live.

The Next two clusters would take those rankings based on the # of Crime rate categories

on rape, murder, assault and urbanpop metrics.

install.packages("plyr",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'plyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\RtmpkXoLDt\downloaded_packages
library(plyr)

mydata <- read.csv(file.choose())
str(mydata)
## 'data.frame':    50 obs. of  5 variables:
##  $ X       : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Murder  : num  13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
##  $ Assault : int  236 263 294 190 276 204 110 238 335 211 ...
##  $ UrbanPop: int  58 48 80 50 91 78 77 72 80 60 ...
##  $ Rape    : num  21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
normalized_data<-scale(mydata[,2:5])

wss = (nrow(normalized_data)-1)*sum(apply(normalized_data, 2, var))      # Determine number of clusters by scree-plot 
for (i in 2:5) wss[i] = sum(kmeans(normalized_data, centers=i)$withinss)
plot(1:5, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")   # Look for an "elbow" in the scree plot #
title(sub = "K-Means Clustering Scree-Plot")

fit <- kmeans(normalized_data, 4) # 4 cluster solution
final2<- data.frame(mydata, fit$cluster) # append cluster membership
final2
##                 X Murder Assault UrbanPop Rape fit.cluster
## 1         Alabama   13.2     236       58 21.2           2
## 2          Alaska   10.0     263       48 44.5           4
## 3         Arizona    8.1     294       80 31.0           4
## 4        Arkansas    8.8     190       50 19.5           2
## 5      California    9.0     276       91 40.6           4
## 6        Colorado    7.9     204       78 38.7           4
## 7     Connecticut    3.3     110       77 11.1           3
## 8        Delaware    5.9     238       72 15.8           3
## 9         Florida   15.4     335       80 31.9           4
## 10        Georgia   17.4     211       60 25.8           2
## 11         Hawaii    5.3      46       83 20.2           3
## 12          Idaho    2.6     120       54 14.2           1
## 13       Illinois   10.4     249       83 24.0           4
## 14        Indiana    7.2     113       65 21.0           3
## 15           Iowa    2.2      56       57 11.3           1
## 16         Kansas    6.0     115       66 18.0           3
## 17       Kentucky    9.7     109       52 16.3           1
## 18      Louisiana   15.4     249       66 22.2           2
## 19          Maine    2.1      83       51  7.8           1
## 20       Maryland   11.3     300       67 27.8           4
## 21  Massachusetts    4.4     149       85 16.3           3
## 22       Michigan   12.1     255       74 35.1           4
## 23      Minnesota    2.7      72       66 14.9           1
## 24    Mississippi   16.1     259       44 17.1           2
## 25       Missouri    9.0     178       70 28.2           4
## 26        Montana    6.0     109       53 16.4           1
## 27       Nebraska    4.3     102       62 16.5           1
## 28         Nevada   12.2     252       81 46.0           4
## 29  New Hampshire    2.1      57       56  9.5           1
## 30     New Jersey    7.4     159       89 18.8           3
## 31     New Mexico   11.4     285       70 32.1           4
## 32       New York   11.1     254       86 26.1           4
## 33 North Carolina   13.0     337       45 16.1           2
## 34   North Dakota    0.8      45       44  7.3           1
## 35           Ohio    7.3     120       75 21.4           3
## 36       Oklahoma    6.6     151       68 20.0           3
## 37         Oregon    4.9     159       67 29.3           3
## 38   Pennsylvania    6.3     106       72 14.9           3
## 39   Rhode Island    3.4     174       87  8.3           3
## 40 South Carolina   14.4     279       48 22.5           2
## 41   South Dakota    3.8      86       45 12.8           1
## 42      Tennessee   13.2     188       59 26.9           2
## 43          Texas   12.7     201       80 25.5           4
## 44           Utah    3.2     120       80 22.9           3
## 45        Vermont    2.2      48       32 11.2           1
## 46       Virginia    8.5     156       63 20.7           3
## 47     Washington    4.0     145       73 26.2           3
## 48  West Virginia    5.7      81       39  9.3           1
## 49      Wisconsin    2.6      53       66 10.8           1
## 50        Wyoming    6.8     161       60 15.6           3
aggregate(mydata[,2:5], by=list(fit$cluster), FUN=mean)
##   Group.1   Murder   Assault UrbanPop     Rape
## 1       1  3.60000  78.53846 52.07692 12.17692
## 2       2 13.93750 243.62500 53.75000 21.41250
## 3       3  5.65625 138.87500 73.87500 18.78125
## 4       4 10.81538 257.38462 76.00000 33.19231
table(fit$cluster)
## 
##  1  2  3  4 
## 13  8 16 13
# install.packages("animation")
# library(animation)
# nm <- (normalized_data[,1:1])
# km <- kmeans(normalized_data,4) #kmeans clustering
# str(km)
# km$cluster
# km1 <- kmeans.ani(normalized_data, 4)
# str(km1)
# km1$centers