#install.packages("cluster")
library(cluster)
#install.packages("ggplot2")
library(ggplot2)
#install.packages("data.table")
library(data.table)
#install.packages("factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#install.packages("maps")
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:cluster':
##
## votes.repub
df <- USArrests
codo<-df
# Escalar las variables numéricas
datos_escalados <- scale(codo)
# Método del codo para definir clusters
set.seed(123)
wcss <- vector()
max_k <- 10
for (k in 1:max_k) {
kmeans_model <- kmeans(datos_escalados, centers = k, nstart = 25)
wcss[k] <- kmeans_model$tot.withinss
}
# Gráfica
par(mar = c(4, 4, 1, 1))
plot(1:max_k, wcss, type = "b", xlab = "Numero de Clusters", ylab = "WCSS", main = "Metodo del Codo", pch = 19, frame = FALSE)
set.seed(123)
optimizacion <- clusGap(datos_escalados, FUN=kmeans, nstart=1, K.max = 10)
plot(optimizacion, xlab="numero de clusters k")
grupos <- 3
segmentos <- kmeans(datos_escalados,grupos)
segmentos
## K-means clustering with 3 clusters of sizes 29, 13, 8
##
## Cluster means:
## Murder Assault UrbanPop Rape
## 1 -0.7010700 -0.7071522 -0.09924526 -0.57773737
## 2 0.6950701 1.0394414 0.72263703 1.27693964
## 3 1.4118898 0.8743346 -0.81452109 0.01927104
##
## Clustering vector:
## Alabama Alaska Arizona Arkansas California
## 3 2 2 3 2
## Colorado Connecticut Delaware Florida Georgia
## 2 1 1 2 3
## Hawaii Idaho Illinois Indiana Iowa
## 1 1 2 1 1
## Kansas Kentucky Louisiana Maine Maryland
## 1 1 3 1 2
## Massachusetts Michigan Minnesota Mississippi Missouri
## 1 2 1 3 2
## Montana Nebraska Nevada New Hampshire New Jersey
## 1 1 2 1 1
## New Mexico New York North Carolina North Dakota Ohio
## 2 2 3 1 1
## Oklahoma Oregon Pennsylvania Rhode Island South Carolina
## 1 1 1 1 3
## South Dakota Tennessee Texas Utah Vermont
## 1 3 2 1 1
## Virginia Washington West Virginia Wisconsin Wyoming
## 1 1 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 53.354791 19.922437 8.316061
## (between_SS / total_SS = 58.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
Asignación <- cbind(df, cluster = segmentos$cluster)
Asignación
## Murder Assault UrbanPop Rape cluster
## Alabama 13.2 236 58 21.2 3
## Alaska 10.0 263 48 44.5 2
## Arizona 8.1 294 80 31.0 2
## Arkansas 8.8 190 50 19.5 3
## California 9.0 276 91 40.6 2
## Colorado 7.9 204 78 38.7 2
## Connecticut 3.3 110 77 11.1 1
## Delaware 5.9 238 72 15.8 1
## Florida 15.4 335 80 31.9 2
## Georgia 17.4 211 60 25.8 3
## Hawaii 5.3 46 83 20.2 1
## Idaho 2.6 120 54 14.2 1
## Illinois 10.4 249 83 24.0 2
## Indiana 7.2 113 65 21.0 1
## Iowa 2.2 56 57 11.3 1
## Kansas 6.0 115 66 18.0 1
## Kentucky 9.7 109 52 16.3 1
## Louisiana 15.4 249 66 22.2 3
## Maine 2.1 83 51 7.8 1
## Maryland 11.3 300 67 27.8 2
## Massachusetts 4.4 149 85 16.3 1
## Michigan 12.1 255 74 35.1 2
## Minnesota 2.7 72 66 14.9 1
## Mississippi 16.1 259 44 17.1 3
## Missouri 9.0 178 70 28.2 2
## Montana 6.0 109 53 16.4 1
## Nebraska 4.3 102 62 16.5 1
## Nevada 12.2 252 81 46.0 2
## New Hampshire 2.1 57 56 9.5 1
## New Jersey 7.4 159 89 18.8 1
## New Mexico 11.4 285 70 32.1 2
## New York 11.1 254 86 26.1 2
## North Carolina 13.0 337 45 16.1 3
## North Dakota 0.8 45 44 7.3 1
## Ohio 7.3 120 75 21.4 1
## Oklahoma 6.6 151 68 20.0 1
## Oregon 4.9 159 67 29.3 1
## Pennsylvania 6.3 106 72 14.9 1
## Rhode Island 3.4 174 87 8.3 1
## South Carolina 14.4 279 48 22.5 3
## South Dakota 3.8 86 45 12.8 1
## Tennessee 13.2 188 59 26.9 3
## Texas 12.7 201 80 25.5 2
## Utah 3.2 120 80 22.9 1
## Vermont 2.2 48 32 11.2 1
## Virginia 8.5 156 63 20.7 1
## Washington 4.0 145 73 26.2 1
## West Virginia 5.7 81 39 9.3 1
## Wisconsin 2.6 53 66 10.8 1
## Wyoming 6.8 161 60 15.6 1
fviz_cluster(segmentos, data=datos_escalados)
promedio <- aggregate(Asignación, by=list(Asignación$cluster), FUN=mean)
promedio
## Group.1 Murder Assault UrbanPop Rape cluster
## 1 1 4.734483 111.8276 64.10345 15.82069 1
## 2 2 10.815385 257.3846 76.00000 33.19231 2
## 3 3 13.937500 243.6250 53.75000 21.41250 3
c=c("Mississippi","North Carolina","South Carolina","Georgia","Alabama","Arkansas","Louisiana","Tennessee")
d=c("Alaska","Maryland","New Mexico","Michigan","Florida","Nevada","California","Colorado","Texas","Illinois","Arizona","New York","Missouri")
e=c("New Jersey","Washington","Connecticut","Delaware","Hawaii","Idaho","Indiana","Iowa","Kansas","Kentucky","Maine","Massachusetts","Minnesota","Montana","Nebraska","New Hampshire","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania","Rhode Island","South Dakota","Utah","Vermont","Virginia","West Virginia","Wisconsin","Wyoming")
map(database = "state")
map(database = "state",regions = c,col = "red",fill = T,add = TRUE)
map(database = "state",regions = d,col = "orange",fill = T,add = TRUE)
map(database = "state",regions = e,col = "green",fill = T,add = TRUE)
#conclusiones