Cluster 4 has got the less vulnerable to all the Crime categories.
crime <- read.csv(file.choose())
install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'rmarkdown' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\RtmpGufOD3\downloaded_packages
install.packages("xlsx",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'xlsx' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\RtmpGufOD3\downloaded_packages
install.packages("rjava",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## Warning: package 'rjava' is not available (for R version 3.5.1)
## Warning: Perhaps you meant 'rJava' ?
# install.packages("yaml",repos = "http://cran.us.r-project.org")
install.packages("ggplot2",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\RtmpGufOD3\downloaded_packages
library(cluster)
library(fpc)
# library(NbClust)
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.9.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
View(crime)
# Normalizing continuous columns to bring them under same scale
normalized_data<-scale(crime[,2:5]) #excluding the ID from spreadsheet
d <- dist(normalized_data, method = "euclidean") # distance matrix
fit <- hclust(d, method="complete")
fit <- as.dendrogram(fit)
cd = color_branches(fit,k=4)
plot(cd) # display dendrogram

plot(fit, hang=-1)
## Warning in plot.window(...): "hang" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "hang" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "hang" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "hang" is not
## a graphical parameter
## Warning in title(...): "hang" is not a graphical parameter

# rect.hclust(fit, k=2, border="red")
groups <- cutree(fit, k=4) # cut tree into 4 clusters
table(groups)
## groups
## 1 2 3 4
## 8 11 21 10
Crime_Rate_Categories<-as.matrix(groups) # groups or cluster numbers
final <- data.frame(crime, Crime_Rate_Categories)
final1 <- final[,c(ncol(final),1:(ncol(final)-1))]
View(final1)
# setwd("C:/Users/tswaminathan/Desktop/Other Desktop Docs Recent/R Prog Assignment/Clustering/Output")
# write.csv(final1, file="Crime_Final.csv",row.names = F)
aggregate(crime[,-1],by=list(final$Crime_Rate_Categories),mean)
## Group.1 Murder Assault UrbanPop Rape
## 1 1 14.087500 252.7500 53.50000 24.53750
## 2 2 11.054545 264.0909 79.09091 32.61818
## 3 3 5.871429 134.4762 70.76190 18.58095
## 4 4 3.180000 78.7000 49.30000 11.63000