La base de datos USArrests contiene estadĂsticas en arrestos por cada 100,000 residentes por agresiĂłn, asesinato y violaciĂłn en cada uno de los 50 estados de EE.UU. en 1973.
#install.packages("cluster") # Para agrupamientos
library(cluster)
#install.packages("ggplot2") # Hacer GrĂĄficas
library(ggplot2)
#install.packages("factoextra") # VisualizaciĂłn Clusters
library(factoextra)
#install.packages("data.table") # Conjunto de datos grande
library(data.table)
library(tidyverse)
datos <- USArrests
summary(datos)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
# Escalar la base de datos
datos_escalados <- scale(USArrests)
# Verificar la media y desviación eståndar después del escalado
summary(datos_escalados)
## Murder Assault UrbanPop Rape
## Min. :-1.6044 Min. :-1.5090 Min. :-2.31714 Min. :-1.4874
## 1st Qu.:-0.8525 1st Qu.:-0.7411 1st Qu.:-0.76271 1st Qu.:-0.6574
## Median :-0.1235 Median :-0.1411 Median : 0.03178 Median :-0.1209
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.7949 3rd Qu.: 0.9388 3rd Qu.: 0.84354 3rd Qu.: 0.5277
## Max. : 2.2069 Max. : 1.9948 Max. : 1.75892 Max. : 2.6444
grupos <- 4 # Inicio con cualquier valor, luego verificamos
segmentos <- kmeans(datos_escalados, grupos)
asignacion <- cbind(datos, cluster =
segmentos$cluster)
fviz_cluster(segmentos, data=datos)
#La cantidad optima de grupos corresponde al punto mas alto de la grafica
set.seed(123)
optimizacion <- clusGap(datos_escalados, FUN=kmeans, nstart=1, K.max=10)
plot(optimizacion, xlab="NĂșmero de clusters k")
#fviz_nbclust(df,kmeans,method = "wss") +
# ggtitle("Método del codo")
promedio <- aggregate(asignacion, by=list(asignacion$cluster),FUN=mean)
promedio
## Group.1 Murder Assault UrbanPop Rape cluster
## 1 1 10.81538 257.38462 76.00000 33.19231 1
## 2 2 5.65625 138.87500 73.87500 18.78125 2
## 3 3 13.93750 243.62500 53.75000 21.41250 3
## 4 4 3.60000 78.53846 52.07692 12.17692 4
table(asignacion$cluster)
##
## 1 2 3 4
## 13 16 8 13
library(sf)
#install.packages("tigris")
library(tigris)
library(ggplot2)
#install.packages("tibble")
library(tibble)
# Obtener el mapa de EE.UU.
us_map <- states(cb = TRUE)
## Retrieving data for the year 2021
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
asignacion$nivel_seguridad <- case_when(
asignacion$cluster == 1 ~ "Alto",
asignacion$cluster == 2 ~ "Muy Alto",
asignacion$cluster == 3 ~ "Medio",
asignacion$cluster == 4 ~ "Bajo",
TRUE ~ "Desconocido" # Para manejar cualquier error
)
head(asignacion)
## Murder Assault UrbanPop Rape cluster nivel_seguridad
## Alabama 13.2 236 58 21.2 3 Medio
## Alaska 10.0 263 48 44.5 1 Alto
## Arizona 8.1 294 80 31.0 1 Alto
## Arkansas 8.8 190 50 19.5 3 Medio
## California 9.0 276 91 40.6 1 Alto
## Colorado 7.9 204 78 38.7 1 Alto
asignacion <- asignacion %>%
rownames_to_column(var = "state")
# Unir los datos con el mapa de EE.UU.
us_clustered <- left_join(us_map, asignacion, by = c("NAME" = "state"))
# Definir colores segĂșn el nivel de seguridad
colores_seguridad <- c("Muy Alto" = "red",
"Alto" = "orange",
"Medio" = "yellow",
"Bajo" = "darkgreen")
# Graficar el mapa
ggplot(data = us_clustered) +
geom_sf(aes(fill = nivel_seguridad), color = "black", size = 0.2) +
scale_fill_manual(values = colores_seguridad, name = "Nivel de Seguridad") +
labs(title = "Mapa de Seguridad en EE.UU. (1973)",
subtitle = "ClasificaciĂłn basada en tasas de criminalidad",
caption = "Fuente: USArrests") +
theme_minimal()