Contexto

La base de datos USArrests contiene estadĂ­sticas en arrestos por cada 100,000 residentes por agresiĂłn, asesinato y violaciĂłn en cada uno de los 50 estados de EE.UU. en 1973.

Instalar paquetes y llamar librerĂ­a

#install.packages("caret") #Algoritmos de aprendizaje automático
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
#install.packages("datasets") #Para usar la base de datos "Iris"
library(datasets)
#install.packages("lattice") #Crear gráficos
library(lattice)
#install.packages("DataExplorer") #Análisis Descriptivo"
library(DataExplorer)
#install.packages("kernlab")
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
#install.packages("dplyr") # ManipulaciĂłn de datos
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages("cluster")
library(cluster)
#install.packages("factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#install.packages("data.table")
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” forcats   1.0.0     âś” stringr   1.5.1
## âś” lubridate 1.9.4     âś” tibble    3.2.1
## âś” purrr     1.0.4     âś” tidyr     1.3.1
## âś” readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– kernlab::alpha()      masks ggplot2::alpha()
## âś– data.table::between() masks dplyr::between()
## âś– purrr::cross()        masks kernlab::cross()
## âś– dplyr::filter()       masks stats::filter()
## âś– data.table::first()   masks dplyr::first()
## âś– lubridate::hour()     masks data.table::hour()
## âś– lubridate::isoweek()  masks data.table::isoweek()
## âś– dplyr::lag()          masks stats::lag()
## âś– data.table::last()    masks dplyr::last()
## âś– purrr::lift()         masks caret::lift()
## âś– lubridate::mday()     masks data.table::mday()
## âś– lubridate::minute()   masks data.table::minute()
## âś– lubridate::month()    masks data.table::month()
## âś– lubridate::quarter()  masks data.table::quarter()
## âś– lubridate::second()   masks data.table::second()
## âś– purrr::transpose()    masks data.table::transpose()
## âś– lubridate::wday()     masks data.table::wday()
## âś– lubridate::week()     masks data.table::week()
## âś– lubridate::yday()     masks data.table::yday()
## âś– lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Importar base de datos

df <- USArrests

Análisis Descriptivo

#create_report(df)
plot_missing(df)

plot_histogram(df)

plot_correlation(df)

summary(df)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00

Scalar la base de datos

df <- df[, c("Murder", "Assault", "Rape")]
datos_escalados <- scale(df)
grupos <- 3 #NĂşmero de clusters inicial
segmentos <- kmeans(datos_escalados, grupos)
df$Cluster <- as.factor(segmentos$cluster)
df$Promedio_Score <- rowMeans(df[, c("Murder", "Assault", "Rape")])
df$Seguridad <- cut(df$Promedio_Score, breaks = quantile(df$Promedio_Score, probs = c(0, 0.25, 0.5, 0.75, 1)),
                    labels = c("Muy Seguro", "Seguro", "Inseguro", "Muy Inseguro"),
                    include.lowest = TRUE)

Graficar cluster

fviz_cluster(segmentos, data=datos_escalados)

set.seed(123)
optimizacion <- clusGap(datos_escalados, FUN=kmeans, nstart=1, K.max=10)
plot(optimizacion, xlab="Numero de Clusters K")

promedio <- aggregate(df[,-ncol(df)], by=list(df$Cluster), FUN=mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
promedio
##   Group.1    Murder   Assault     Rape Cluster Promedio_Score
## 1       1 12.331579 259.31579 29.21579      NA      100.28772
## 2       2  3.078571  80.92857 11.80000      NA       31.93571
## 3       3  6.588235 145.76471 20.07647      NA       57.47647
table(df$Cluster)
## 
##  1  2  3 
## 19 14 17
LS0tDQp0aXRsZTogIlVTQWFycmVzdHMiDQphdXRob3I6ICJOYW5jeSBNYXJyb3F1w61uIC0gQTAxMTk4NTUzIg0KZGF0ZTogIjIwMjUtMDItMjEiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUNCiAgICB0aGVtZTogam91cm5hbA0KLS0tDQohW10oIkM6XFxVc2Vyc1xcbGVub3ZvXFxEb3dubG9hZHNcXFBVUkdFLmpwZyIpDQoNCiMgPHNwYW4gc3R5bGU9ICJjb2xvcjpibHVlOyI+Q29udGV4dG88L3NwYW4+DQpMYSBiYXNlIGRlIGRhdG9zICoqVVNBcnJlc3RzKiogY29udGllbmUgZXN0YWTDrXN0aWNhcyBlbiBhcnJlc3RvcyBwb3IgY2FkYSAxMDAsMDAwIHJlc2lkZW50ZXMgcG9yIGFncmVzacOzbiwgYXNlc2luYXRvIHkgdmlvbGFjacOzbiBlbiBjYWRhIHVubyBkZSBsb3MgNTAgZXN0YWRvcyBkZSBFRS5VVS4gZW4gMTk3My4NCg0KIyA8c3BhbiBzdHlsZT0gImNvbG9yOmJsdWU7Ij5JbnN0YWxhciBwYXF1ZXRlcyB5IGxsYW1hciBsaWJyZXLDrWE8L3NwYW4+DQpgYGB7cn0NCiNpbnN0YWxsLnBhY2thZ2VzKCJjYXJldCIpICNBbGdvcml0bW9zIGRlIGFwcmVuZGl6YWplIGF1dG9tw6F0aWNvDQpsaWJyYXJ5KGNhcmV0KQ0KI2luc3RhbGwucGFja2FnZXMoImRhdGFzZXRzIikgI1BhcmEgdXNhciBsYSBiYXNlIGRlIGRhdG9zICJJcmlzIg0KbGlicmFyeShkYXRhc2V0cykNCiNpbnN0YWxsLnBhY2thZ2VzKCJsYXR0aWNlIikgI0NyZWFyIGdyw6FmaWNvcw0KbGlicmFyeShsYXR0aWNlKQ0KI2luc3RhbGwucGFja2FnZXMoIkRhdGFFeHBsb3JlciIpICNBbsOhbGlzaXMgRGVzY3JpcHRpdm8iDQpsaWJyYXJ5KERhdGFFeHBsb3JlcikNCiNpbnN0YWxsLnBhY2thZ2VzKCJrZXJubGFiIikNCmxpYnJhcnkoa2VybmxhYikNCiNpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpICMgTWFuaXB1bGFjacOzbiBkZSBkYXRvcw0KbGlicmFyeShkcGx5cikNCiNpbnN0YWxsLnBhY2thZ2VzKCJjbHVzdGVyIikNCmxpYnJhcnkoY2x1c3RlcikNCiNpbnN0YWxsLnBhY2thZ2VzKCJmYWN0b2V4dHJhIikNCmxpYnJhcnkoZmFjdG9leHRyYSkNCiNpbnN0YWxsLnBhY2thZ2VzKCJkYXRhLnRhYmxlIikNCmxpYnJhcnkoZGF0YS50YWJsZSkNCiNpbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0gImNvbG9yOmJsdWU7Ij5JbXBvcnRhciBiYXNlIGRlIGRhdG9zPC9zcGFuPg0KYGBge3J9DQpkZiA8LSBVU0FycmVzdHMNCmBgYA0KDQojIyA8c3BhbiBzdHlsZT0gImNvbG9yOmJsdWU7Ij5BbsOhbGlzaXMgRGVzY3JpcHRpdm88L3NwYW4+DQoNCmBgYHtyfQ0KI2NyZWF0ZV9yZXBvcnQoZGYpDQpwbG90X21pc3NpbmcoZGYpDQpwbG90X2hpc3RvZ3JhbShkZikNCnBsb3RfY29ycmVsYXRpb24oZGYpDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5KGRmKQ0KYGBgDQojIyA8c3BhbiBzdHlsZT0gImNvbG9yOmJsdWU7Ij5TY2FsYXIgbGEgYmFzZSBkZSBkYXRvczwvc3Bhbj4NCmBgYHtyfQ0KZGYgPC0gZGZbLCBjKCJNdXJkZXIiLCAiQXNzYXVsdCIsICJSYXBlIildDQpkYXRvc19lc2NhbGFkb3MgPC0gc2NhbGUoZGYpDQpgYGANCg0KYGBge3J9DQpncnVwb3MgPC0gMyAjTsO6bWVybyBkZSBjbHVzdGVycyBpbmljaWFsDQpzZWdtZW50b3MgPC0ga21lYW5zKGRhdG9zX2VzY2FsYWRvcywgZ3J1cG9zKQ0KYGBgDQoNCmBgYHtyfQ0KZGYkQ2x1c3RlciA8LSBhcy5mYWN0b3Ioc2VnbWVudG9zJGNsdXN0ZXIpDQpgYGANCg0KYGBge3J9DQpkZiRQcm9tZWRpb19TY29yZSA8LSByb3dNZWFucyhkZlssIGMoIk11cmRlciIsICJBc3NhdWx0IiwgIlJhcGUiKV0pDQpgYGANCg0KYGBge3J9DQpkZiRTZWd1cmlkYWQgPC0gY3V0KGRmJFByb21lZGlvX1Njb3JlLCBicmVha3MgPSBxdWFudGlsZShkZiRQcm9tZWRpb19TY29yZSwgcHJvYnMgPSBjKDAsIDAuMjUsIDAuNSwgMC43NSwgMSkpLA0KICAgICAgICAgICAgICAgICAgICBsYWJlbHMgPSBjKCJNdXkgU2VndXJvIiwgIlNlZ3VybyIsICJJbnNlZ3VybyIsICJNdXkgSW5zZWd1cm8iKSwNCiAgICAgICAgICAgICAgICAgICAgaW5jbHVkZS5sb3dlc3QgPSBUUlVFKQ0KYGBgDQoNCiMjIDxzcGFuIHN0eWxlPSAiY29sb3I6Ymx1ZTsiPkdyYWZpY2FyIGNsdXN0ZXI8L3NwYW4+DQpgYGB7cn0NCmZ2aXpfY2x1c3RlcihzZWdtZW50b3MsIGRhdGE9ZGF0b3NfZXNjYWxhZG9zKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzKQ0Kb3B0aW1pemFjaW9uIDwtIGNsdXNHYXAoZGF0b3NfZXNjYWxhZG9zLCBGVU49a21lYW5zLCBuc3RhcnQ9MSwgSy5tYXg9MTApDQpwbG90KG9wdGltaXphY2lvbiwgeGxhYj0iTnVtZXJvIGRlIENsdXN0ZXJzIEsiKQ0KYGBgDQoNCmBgYHtyfQ0KcHJvbWVkaW8gPC0gYWdncmVnYXRlKGRmWywtbmNvbChkZildLCBieT1saXN0KGRmJENsdXN0ZXIpLCBGVU49bWVhbikNCnByb21lZGlvDQp0YWJsZShkZiRDbHVzdGVyKQ0KYGBgDQoNCg==