Introducción

La siguiente actividad utiliza herramientas de segmentación y visualización en mapas para generar un pequeño análisis sobre el nivel de violencia en los Estados Unidos. Se utilizó la base de datos USArrest proporcionada por R; esta base de datos proporciona información estadística sobre arrestos por cada 100,000 habitantes por diferentes tipos de crímenes cometidos en los 50 estados de EE. UU. en 1973. Para la creación de los segmentos se utilizaron las variables de Murder, Assault y Rape. Los datos fueron escalados antes de usarlos.

Más información:
Data Set - USArrest

Paso 1 - Segmentación

library(cluster)
library(ggplot2)
library(data.table)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:cluster':
## 
##     votes.repub
df <- USArrests
df2 <- select(df, -UrbanPop)
summary(df2)
##      Murder          Assault           Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :46.00
datos <- scale(df2)
set.seed(123)
wcss <- vector()
max_k <- 10 #se puede modificar
for (k in 1:max_k) {
  kmeans_model <- kmeans(datos, centers = k, nstart = 25)
  wcss[k] <- kmeans_model$tot.withinss
}

par(mar=c(4,4,1,1))
plot(1:max_k, wcss, type="b", xlab="Número de Clusters", ylab="Wcss", main="Método del Codo", pch=19, frame=FALSE)

grupos <- 3
segmentos <- kmeans(datos, grupos)
segmentos # en los resultados x es el centroide del segmento
## K-means clustering with 3 clusters of sizes 8, 30, 12
## 
## Cluster means:
##       Murder    Assault       Rape
## 1  0.4992527  0.9613301  1.6861362
## 2 -0.6699560 -0.6758849 -0.5646433
## 3  1.3420549  1.0488255  0.2875176
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              3              1              1              2              1 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              1              2              2              3              3 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              2              2              3              2              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              2              2              3              2              3 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              2              1              2              3              1 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              1              2              2 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              1              3              3              2              2 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              2              2              2              2              3 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              3              3              2              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              2              2              2              2              2 
## 
## Within cluster sum of squares by cluster:
## [1]  6.102694 27.386869  8.838465
##  (between_SS / total_SS =  71.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
asignacion <- cbind(df, cluster = segmentos$cluster)
fviz_cluster(segmentos, data=df2)

set.seed(123)
optimizacion <- clusGap(df2, FUN=kmeans, nstart=1, K.max=10)
plot(optimizacion, xlab="Número de clusters k")

Paso 2 - Mapa

summary_data <- asignacion %>%
                group_by(cluster) %>%
                summarise_all(mean)

print(summary_data)
## # A tibble: 3 × 5
##   cluster Murder Assault UrbanPop  Rape
##     <int>  <dbl>   <dbl>    <dbl> <dbl>
## 1       1   9.96    251.     74    37.0
## 2       2   4.87    114.     63.6  15.9
## 3       3  13.6     258.     64.7  23.9
cluster2 <- filter(asignacion, cluster == 2)
cluster2
##               Murder Assault UrbanPop Rape cluster
## Arkansas         8.8     190       50 19.5       2
## Connecticut      3.3     110       77 11.1       2
## Delaware         5.9     238       72 15.8       2
## Hawaii           5.3      46       83 20.2       2
## Idaho            2.6     120       54 14.2       2
## Indiana          7.2     113       65 21.0       2
## Iowa             2.2      56       57 11.3       2
## Kansas           6.0     115       66 18.0       2
## Kentucky         9.7     109       52 16.3       2
## Maine            2.1      83       51  7.8       2
## Massachusetts    4.4     149       85 16.3       2
## Minnesota        2.7      72       66 14.9       2
## Montana          6.0     109       53 16.4       2
## Nebraska         4.3     102       62 16.5       2
## New Hampshire    2.1      57       56  9.5       2
## New Jersey       7.4     159       89 18.8       2
## North Dakota     0.8      45       44  7.3       2
## Ohio             7.3     120       75 21.4       2
## Oklahoma         6.6     151       68 20.0       2
## Oregon           4.9     159       67 29.3       2
## Pennsylvania     6.3     106       72 14.9       2
## Rhode Island     3.4     174       87  8.3       2
## South Dakota     3.8      86       45 12.8       2
## Utah             3.2     120       80 22.9       2
## Vermont          2.2      48       32 11.2       2
## Virginia         8.5     156       63 20.7       2
## Washington       4.0     145       73 26.2       2
## West Virginia    5.7      81       39  9.3       2
## Wisconsin        2.6      53       66 10.8       2
## Wyoming          6.8     161       60 15.6       2
clu1 <- c("Alaska", "Arizona", "California", "Colorado", "Michigan", "Missouri", "Nevada", "New Mexico")
clu2 <- c("Arkansas", "Connecticut", "Delaware", "Hawaii", "Idaho", "Indiana", "Iowa", "Kansas", "Kentucky", "Maine", "Massachusetts", "Minnesota", "Montana", "Nebraska", "New Hampshire", "New Jersey", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Dakota", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming")
clu3 <- c("Alabama", "Florida", "Georgia", "Illinois", "Louisiana", "Maryland", "Mississippi", "New York", "North Carolina", "South Carolina", "Tennessee", "Texas")

map(database = "state")
map(database="state", regions = clu2, col="green", fill=T, add=TRUE)
map(database="state", regions = clu1, col="orange", fill=T, add=TRUE)
map(database="state", regions = clu3, col="red", fill=T, add=TRUE)

Conclusión

Al realizar este análisis, podemos ver que conforme aumenta la urbanización de los estados, la violencia también tiende a aumentar. Asimismo, podemos notar que a lo largo de la frontera sur se concentran los estados más violentos o peligrosos, lo que sugiere una posible correlación entre la proximidad a áreas urbanas densamente pobladas y niveles más altos de violencia. Esta tendencia puede ser de particular interés para los encargados de formular políticas y programas de prevención del delito en esas regiones.

LS0tCnRpdGxlOiAiQWN0aXZpZGFkIC0gVVNBcnJlc3RzIgphdXRob3I6ICJMaXNzZXQgSGVybsOhbmRleiBBMDEyODQ2MTEiCmRhdGU6ICIyMDI0LTAyLTIwIgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQotLS0KIVtdKC9Vc2Vycy9saXNoZHovRG93bmxvYWRzL2dpcGh5LTIxLmdpZikgIAoKICAKIyBJbnRyb2R1Y2Npw7NuIApMYSBzaWd1aWVudGUgYWN0aXZpZGFkIHV0aWxpemEgaGVycmFtaWVudGFzIGRlIHNlZ21lbnRhY2nDs24geSB2aXN1YWxpemFjacOzbiBlbiBtYXBhcyBwYXJhIGdlbmVyYXIgdW4gcGVxdWXDsW8gYW7DoWxpc2lzIHNvYnJlIGVsIG5pdmVsIGRlIHZpb2xlbmNpYSBlbiBsb3MgRXN0YWRvcyBVbmlkb3MuIFNlIHV0aWxpesOzIGxhIGJhc2UgZGUgZGF0b3MgKlVTQXJyZXN0KiBwcm9wb3JjaW9uYWRhIHBvciBSOyBlc3RhIGJhc2UgZGUgZGF0b3MgcHJvcG9yY2lvbmEgaW5mb3JtYWNpw7NuIGVzdGFkw61zdGljYSBzb2JyZSBhcnJlc3RvcyBwb3IgY2FkYSAxMDAsMDAwIGhhYml0YW50ZXMgcG9yIGRpZmVyZW50ZXMgdGlwb3MgZGUgY3LDrW1lbmVzIGNvbWV0aWRvcyBlbiBsb3MgNTAgZXN0YWRvcyBkZSBFRS4gVVUuIGVuIDE5NzMuIFBhcmEgbGEgY3JlYWNpw7NuIGRlIGxvcyBzZWdtZW50b3Mgc2UgdXRpbGl6YXJvbiBsYXMgdmFyaWFibGVzIGRlICpNdXJkZXIqLCAqQXNzYXVsdCogeSAqUmFwZSouIExvcyBkYXRvcyBmdWVyb24gZXNjYWxhZG9zIGFudGVzIGRlIHVzYXJsb3MuCgpNw6FzIGluZm9ybWFjacOzbjogIApbRGF0YSBTZXQgLSBVU0FycmVzdF0oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2RhdGFzZXRzL3ZlcnNpb25zLzMuNi4yL3RvcGljcy9VU0FycmVzdHMpCgojIFBhc28gMSAtIFNlZ21lbnRhY2nDs24KYGBge3J9CmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkoZmFjdG9leHRyYSkKbGlicmFyeShkcGx5cikKbGlicmFyeShtYXBzKQpgYGAKCmBgYHtyfQpkZiA8LSBVU0FycmVzdHMKZGYyIDwtIHNlbGVjdChkZiwgLVVyYmFuUG9wKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGRmMikKYGBgCgpgYGB7cn0KZGF0b3MgPC0gc2NhbGUoZGYyKQpgYGAKCmBgYHtyfQpzZXQuc2VlZCgxMjMpCndjc3MgPC0gdmVjdG9yKCkKbWF4X2sgPC0gMTAgI3NlIHB1ZWRlIG1vZGlmaWNhcgpmb3IgKGsgaW4gMTptYXhfaykgewogIGttZWFuc19tb2RlbCA8LSBrbWVhbnMoZGF0b3MsIGNlbnRlcnMgPSBrLCBuc3RhcnQgPSAyNSkKICB3Y3NzW2tdIDwtIGttZWFuc19tb2RlbCR0b3Qud2l0aGluc3MKfQoKcGFyKG1hcj1jKDQsNCwxLDEpKQpwbG90KDE6bWF4X2ssIHdjc3MsIHR5cGU9ImIiLCB4bGFiPSJOw7ptZXJvIGRlIENsdXN0ZXJzIiwgeWxhYj0iV2NzcyIsIG1haW49Ik3DqXRvZG8gZGVsIENvZG8iLCBwY2g9MTksIGZyYW1lPUZBTFNFKQpgYGAKCgpgYGB7cn0KZ3J1cG9zIDwtIDMKYGBgCgpgYGB7cn0Kc2VnbWVudG9zIDwtIGttZWFucyhkYXRvcywgZ3J1cG9zKQpzZWdtZW50b3MgIyBlbiBsb3MgcmVzdWx0YWRvcyB4IGVzIGVsIGNlbnRyb2lkZSBkZWwgc2VnbWVudG8KYGBgCgpgYGB7cn0KYXNpZ25hY2lvbiA8LSBjYmluZChkZiwgY2x1c3RlciA9IHNlZ21lbnRvcyRjbHVzdGVyKQpgYGAKCmBgYHtyfQpmdml6X2NsdXN0ZXIoc2VnbWVudG9zLCBkYXRhPWRmMikKYGBgCgpgYGB7cn0Kc2V0LnNlZWQoMTIzKQpvcHRpbWl6YWNpb24gPC0gY2x1c0dhcChkZjIsIEZVTj1rbWVhbnMsIG5zdGFydD0xLCBLLm1heD0xMCkKcGxvdChvcHRpbWl6YWNpb24sIHhsYWI9Ik7Dum1lcm8gZGUgY2x1c3RlcnMgayIpCmBgYAoKIyBQYXNvIDIgLSBNYXBhCgpgYGB7cn0Kc3VtbWFyeV9kYXRhIDwtIGFzaWduYWNpb24gJT4lCiAgICAgICAgICAgICAgICBncm91cF9ieShjbHVzdGVyKSAlPiUKICAgICAgICAgICAgICAgIHN1bW1hcmlzZV9hbGwobWVhbikKCnByaW50KHN1bW1hcnlfZGF0YSkKICAgICAgICAgICAgCmBgYAoKCmBgYHtyfQpjbHVzdGVyMiA8LSBmaWx0ZXIoYXNpZ25hY2lvbiwgY2x1c3RlciA9PSAyKQpjbHVzdGVyMgpgYGAKCmBgYHtyfQpjbHUxIDwtIGMoIkFsYXNrYSIsICJBcml6b25hIiwgIkNhbGlmb3JuaWEiLCAiQ29sb3JhZG8iLCAiTWljaGlnYW4iLCAiTWlzc291cmkiLCAiTmV2YWRhIiwgIk5ldyBNZXhpY28iKQpjbHUyIDwtIGMoIkFya2Fuc2FzIiwgIkNvbm5lY3RpY3V0IiwgIkRlbGF3YXJlIiwgIkhhd2FpaSIsICJJZGFobyIsICJJbmRpYW5hIiwgIklvd2EiLCAiS2Fuc2FzIiwgIktlbnR1Y2t5IiwgIk1haW5lIiwgIk1hc3NhY2h1c2V0dHMiLCAiTWlubmVzb3RhIiwgIk1vbnRhbmEiLCAiTmVicmFza2EiLCAiTmV3IEhhbXBzaGlyZSIsICJOZXcgSmVyc2V5IiwgIk5vcnRoIERha290YSIsICJPaGlvIiwgIk9rbGFob21hIiwgIk9yZWdvbiIsICJQZW5uc3lsdmFuaWEiLCAiUmhvZGUgSXNsYW5kIiwgIlNvdXRoIERha290YSIsICJVdGFoIiwgIlZlcm1vbnQiLCAiVmlyZ2luaWEiLCAiV2FzaGluZ3RvbiIsICJXZXN0IFZpcmdpbmlhIiwgIldpc2NvbnNpbiIsICJXeW9taW5nIikKY2x1MyA8LSBjKCJBbGFiYW1hIiwgIkZsb3JpZGEiLCAiR2VvcmdpYSIsICJJbGxpbm9pcyIsICJMb3Vpc2lhbmEiLCAiTWFyeWxhbmQiLCAiTWlzc2lzc2lwcGkiLCAiTmV3IFlvcmsiLCAiTm9ydGggQ2Fyb2xpbmEiLCAiU291dGggQ2Fyb2xpbmEiLCAiVGVubmVzc2VlIiwgIlRleGFzIikKCm1hcChkYXRhYmFzZSA9ICJzdGF0ZSIpCm1hcChkYXRhYmFzZT0ic3RhdGUiLCByZWdpb25zID0gY2x1MiwgY29sPSJncmVlbiIsIGZpbGw9VCwgYWRkPVRSVUUpCm1hcChkYXRhYmFzZT0ic3RhdGUiLCByZWdpb25zID0gY2x1MSwgY29sPSJvcmFuZ2UiLCBmaWxsPVQsIGFkZD1UUlVFKQptYXAoZGF0YWJhc2U9InN0YXRlIiwgcmVnaW9ucyA9IGNsdTMsIGNvbD0icmVkIiwgZmlsbD1ULCBhZGQ9VFJVRSkKYGBgCgojIENvbmNsdXNpw7NuCkFsIHJlYWxpemFyIGVzdGUgYW7DoWxpc2lzLCBwb2RlbW9zIHZlciBxdWUgY29uZm9ybWUgYXVtZW50YSBsYSB1cmJhbml6YWNpw7NuIGRlIGxvcyBlc3RhZG9zLCBsYSB2aW9sZW5jaWEgdGFtYmnDqW4gdGllbmRlIGEgYXVtZW50YXIuIEFzaW1pc21vLCBwb2RlbW9zIG5vdGFyIHF1ZSBhIGxvIGxhcmdvIGRlIGxhIGZyb250ZXJhIHN1ciBzZSBjb25jZW50cmFuIGxvcyBlc3RhZG9zIG3DoXMgdmlvbGVudG9zIG8gcGVsaWdyb3NvcywgbG8gcXVlIHN1Z2llcmUgdW5hIHBvc2libGUgY29ycmVsYWNpw7NuIGVudHJlIGxhIHByb3hpbWlkYWQgYSDDoXJlYXMgdXJiYW5hcyBkZW5zYW1lbnRlIHBvYmxhZGFzIHkgbml2ZWxlcyBtw6FzIGFsdG9zIGRlIHZpb2xlbmNpYS4gRXN0YSB0ZW5kZW5jaWEgcHVlZGUgc2VyIGRlIHBhcnRpY3VsYXIgaW50ZXLDqXMgcGFyYSBsb3MgZW5jYXJnYWRvcyBkZSBmb3JtdWxhciBwb2zDrXRpY2FzIHkgcHJvZ3JhbWFzIGRlIHByZXZlbmNpw7NuIGRlbCBkZWxpdG8gZW4gZXNhcyByZWdpb25lcy4KCgo=