# En este notebook se encuentra el análisis y procedimientos realizados para la construcción de un modelo de agrupamiento de barrios de acuerdo a los accidentes registrados en Medellín en los años 2014-2018.
# En el modelo a desarrollar se tendrá múltiples variables como criterios para el agrupamiento

# Configuración del directorio por default
setwd("C:/RDATA")
#
getwd()

## [1] "C:/RDATA"

# [1] ""C:/RDATA""

# Validacion de la instalación y carga de las librerías 
if (! ("readxl" %in% rownames(installed.packages()))) { install.packages("readxl", dependencies = TRUE) } # para cargar archivos xlsx
if (! ("dplyr" %in% rownames(installed.packages()))) { install.packages("dplyr", dependencies = TRUE) }
if (! ("scales" %in% rownames(installed.packages()))) { install.packages("scales", dependencies = TRUE) }
if (! ("plyr" %in% rownames(installed.packages()))) { install.packages("plyr", dependencies = TRUE) }
if (! ("qcc" %in% rownames(installed.packages()))) { install.packages("qcc", dependencies = TRUE) }
if (! ("ggplot2" %in% rownames(installed.packages()))) { install.packages("ggplot2", dependencies = TRUE) }
if (! ("NbClust" %in% rownames(installed.packages()))) { install.packages("NbClust", dependencies = TRUE) }
if (! ("car" %in% rownames(installed.packages()))) { install.packages("car", dependencies = TRUE) }
if (! ("rgl" %in% rownames(installed.packages()))) { install.packages("rgl", dependencies = TRUE) }
if (! ("cluster" %in% rownames(installed.packages()))) { install.packages("cluster", dependencies = TRUE) }
if (! ("factoextra" %in% rownames(installed.packages()))) { install.packages("factoextra", dependencies = TRUE) }
if (! ("kohonen" %in% rownames(installed.packages()))) { install.packages("kohonen", dependencies = TRUE) }
if (! ("clustertend" %in% rownames(installed.packages()))) { install.packages("clustertend", dependencies = TRUE) }
if (! ("seriation" %in% rownames(installed.packages()))) { install.packages("seriation", dependencies = TRUE) }
if (! ("Hmisc" %in% rownames(installed.packages()))) { install.packages("Hmisc", dependencies = TRUE) }
if (! ("rfm" %in% rownames(installed.packages()))) { install.packages("rfm", dependencies = TRUE) }
if (! ("dbscan" %in% rownames(installed.packages()))) { install.packages("dbscan", dependencies = TRUE) }
if (! ("writexl" %in% rownames(installed.packages()))) { install.packages("writexl", dependencies = TRUE) }
if (! ("randomForest" %in% rownames(installed.packages()))) { install.packages("randomForest", dependencies = TRUE) }
if (! ("C50" %in% rownames(installed.packages()))) { install.packages("C50", dependencies = TRUE) }
if (! ("rpart" %in% rownames(installed.packages()))) { install.packages("rpart", dependencies = TRUE) }
if (! ("Boruta" %in% rownames(installed.packages()))) { install.packages("Boruta", dependencies = TRUE) }
if (! ("partykit" %in% rownames(installed.packages()))) { install.packages("partykit", dependencies = TRUE) }
if (! ("liquidSVM" %in% rownames(installed.packages()))) { install.packages("liquidSVM", dependencies = TRUE) }
if (! ("ggdendro" %in% rownames(installed.packages()))) { install.packages("ggdendro", dependencies = TRUE) }

# Carga de librerias
library(formattable)
library(Boruta)
library(rpart)
library(randomForest)
library(partykit)
library(liquidSVM)
library(writexl)
library(car)
library(rgl)
library(NbClust)
library(plyr)
library(ggplot2)
library(qcc)
library(readxl)
library(dplyr)
library(scales)
library(cluster)
library(factoextra)
library(kohonen)
library(cluster)
library(clustertend)
library(seriation)
library (Hmisc)
library (dbscan)
library(caret)
library(fpc)
library(clValid)
library(mclust)
library(ppclust)
library(FactoMineR)
library(C50)
library(factoextra)
library(ggdendro)

# lectura de los datos
accidentes <- read_excel("C:/RDATA/Barrios_Cluster.xlsx")


# Visualizar estructura de datos
head(accidentes)

## # A tibble: 6 x 35
##   Barrio Comuna LONGITUD_BARRIO LATITUD_BARRIO Cant.LunesAVier~ Cant.SabadoYDom~
##   <chr>  <chr>            <dbl>          <dbl>            <dbl>            <dbl>
## 1 Aguas~ Corre~           -75.6           6.24               55               31
## 2 Aldea~ Popul~           -75.5           6.29               50               34
## 3 Aleja~ El Po~           -75.7           6.22              519               91
## 4 Aleja~ Bueno~           -75.5           6.24              598              242
## 5 Alfon~ Casti~           -75.6           6.28              828              320
## 6 Altam~ Roble~           -75.6           6.28              618              168
## # ... with 29 more variables: Cant.Enero <dbl>, Cant.Febrero <dbl>,
## #   Cant.Marzo <dbl>, Cant.Abril <dbl>, Cant.Mayo <dbl>, Cant.Junio <dbl>,
## #   Cant.Julio <dbl>, Cant.Agosto <dbl>, Cant.Septiembre <dbl>,
## #   Cant.Octubre <dbl>, Cant.Noviembre <dbl>, Cant.Diciembre <dbl>,
## #   Cant.Atropello <dbl>, Cant.CaidadeOcupante <dbl>, Cant.Choque <dbl>,
## #   Cant.ChoqueyAtropello <dbl>, Cant.Incendio <dbl>, Cant.Otro <dbl>,
## #   Cant.Volcamiento <dbl>, Prom.Lunes <dbl>, Prom.Martes <dbl>,
## #   Prom.Miercoles <dbl>, Prom.Jueves <dbl>, Prom.Viernes <dbl>,
## #   Prom.Sabado <dbl>, Prom.Domingo <dbl>, Cant.Herido <dbl>,
## #   Cant.Muerto <dbl>, Cant.Solodaños <dbl>

# Nombre de las columnas
colnames(accidentes)

##  [1] "Barrio"                "Comuna"                "LONGITUD_BARRIO"      
##  [4] "LATITUD_BARRIO"        "Cant.LunesAViernes"    "Cant.SabadoYDomingo"  
##  [7] "Cant.Enero"            "Cant.Febrero"          "Cant.Marzo"           
## [10] "Cant.Abril"            "Cant.Mayo"             "Cant.Junio"           
## [13] "Cant.Julio"            "Cant.Agosto"           "Cant.Septiembre"      
## [16] "Cant.Octubre"          "Cant.Noviembre"        "Cant.Diciembre"       
## [19] "Cant.Atropello"        "Cant.CaidadeOcupante"  "Cant.Choque"          
## [22] "Cant.ChoqueyAtropello" "Cant.Incendio"         "Cant.Otro"            
## [25] "Cant.Volcamiento"      "Prom.Lunes"            "Prom.Martes"          
## [28] "Prom.Miercoles"        "Prom.Jueves"           "Prom.Viernes"         
## [31] "Prom.Sabado"           "Prom.Domingo"          "Cant.Herido"          
## [34] "Cant.Muerto"           "Cant.Solodaños"

# Definición de las variables
# Barrio = Nombre del barrio
# Comuna = Nombre de la comuna
# Coordenadas
# LONGITUD_BARRIO = Longitud Barrio
# LATITUD_BARRIO = Latitud Barrio

# accidentes$Cant.Enero = Cantidad acumulada de accidentes en Enero de 2014-2018
# accidentes$Cant.Febrero = Cantidad acumulada de accidentes en febrero de 2014-2018
# accidentes$Cant.Marzo = Cantidad acumulada de accidentes en Marzo de 2014-2018
# accidentes$Cant.Abril = Cantidad acumulada de accidentes en Abril de 2014-2018
# accidentes$Cant.Mayo = Cantidad acumulada de accidentes en Mayo de 2014-2018
# accidentes$Cant.Junio = Cantidad acumulada de accidentes en Junio de 2014-2018
# accidentes$Cant.Julio = Cantidad acumulada de accidentes en Julio de 2014-2018
# accidentes$Cant.Agosto = Cantidad acumulada de accidentes en Agosto de 2014-2018
# accidentes$Cant.Septiembre = Cantidad acumulada de accidentes en septiembre de 2014-2018
# accidentes$Cant.Octubre = Cantidad acumulada de accidentes en Octubre de 2014-2018
# accidentes$Cant.Noviembre = Cantidad acumulada de accidentes en Noviembre de 2014-2018
# accidentes$Cant.Diciembre = Cantidad acumulada de accidentes en Diciembre de 2014-2018
# accidentes$Cant.LunesAViernes = Cantidad acumulada de accidentes en dias laborales de 2014-2018
# accidentes$Cant.SabadoYDomingo = Cantidad acumulada de accidentes fines de semana de 2014-2018

# Promedio de Accidentes por día entre 2014 y 2018
# Prom.Lunes
# Prom.Martes
# Prom.Miercoles
# Prom.Jueves
# Prom.Viernes         
# Prom.Sabado
# Prom.Domingo
# Cant.Herido
# Cant.Muerto
# Cant.Solodaños 

# Cantidad acumulada de Accidentes por clase entre 2014 y 2018
# Cant.Atropello
# Cant.CaidadeOcupante
# Cant.Choque
# Cant.ChoqueyAtropello
# Cant.Incendio
# Cant.Otro
# Cant.Volcamiento

# Cantidad acumulada de Accidentes por Gravedad entre 2014 y 2018
# Cant.Herido = Cantidad de accidientes con heridos
# Cant.Muerto = Cantidad de accidientes con Muertos
# Cant.Solodaños = Cantidad de accidentes con solo daños



# accidentesmes.df<-accidentes[,c(7,8,9,10,11,12,13,14,15,16,17,18)]
# colnames(accidentesmes.df)


# Explorar estructura de datos

class(accidentes)

## [1] "tbl_df"     "tbl"        "data.frame"

str(accidentes)

## tibble [318 x 35] (S3: tbl_df/tbl/data.frame)
##  $ Barrio               : chr [1:318] "Aguas Frias" "Aldea Pablo VI" "Alejandría" "Alejandro Echavarría" ...
##  $ Comuna               : chr [1:318] "Corregimiento de Altavista" "Popular" "El Poblado" "Buenos Aires" ...
##  $ LONGITUD_BARRIO      : num [1:318] -75.6 -75.5 -75.7 -75.5 -75.6 ...
##  $ LATITUD_BARRIO       : num [1:318] 6.24 6.29 6.22 6.24 6.28 ...
##  $ Cant.LunesAViernes   : num [1:318] 55 50 519 598 828 618 255 120 223 166 ...
##  $ Cant.SabadoYDomingo  : num [1:318] 31 34 91 242 320 168 101 64 43 74 ...
##  $ Cant.Enero           : num [1:318] 5 10 45 60 96 56 34 12 24 16 ...
##  $ Cant.Febrero         : num [1:318] 8 8 54 58 90 71 29 12 21 24 ...
##  $ Cant.Marzo           : num [1:318] 7 7 46 82 115 65 31 20 15 26 ...
##  $ Cant.Abril           : num [1:318] 4 8 44 75 90 67 28 19 26 18 ...
##  $ Cant.Mayo            : num [1:318] 13 6 62 81 102 78 25 11 16 22 ...
##  $ Cant.Junio           : num [1:318] 6 4 52 80 71 67 31 14 24 18 ...
##  $ Cant.Julio           : num [1:318] 10 12 51 60 104 68 34 15 21 13 ...
##  $ Cant.Agosto          : num [1:318] 10 7 53 68 89 72 36 16 24 23 ...
##  $ Cant.Septiembre      : num [1:318] 5 10 52 81 95 71 31 18 32 19 ...
##  $ Cant.Octubre         : num [1:318] 6 2 51 66 121 52 26 13 25 14 ...
##  $ Cant.Noviembre       : num [1:318] 7 5 57 58 81 55 22 16 16 24 ...
##  $ Cant.Diciembre       : num [1:318] 5 5 43 71 94 64 29 18 22 23 ...
##  $ Cant.Atropello       : num [1:318] 16 27 15 98 132 51 62 30 15 58 ...
##  $ Cant.CaidadeOcupante : num [1:318] 12 12 19 84 178 146 29 29 19 25 ...
##  $ Cant.Choque          : num [1:318] 40 35 530 482 608 468 209 77 199 115 ...
##  $ Cant.ChoqueyAtropello: num [1:318] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Cant.Incendio        : num [1:318] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Cant.Otro            : num [1:318] 14 9 37 125 181 105 44 38 23 30 ...
##  $ Cant.Volcamiento     : num [1:318] 4 1 9 51 49 16 12 10 10 12 ...
##  $ Prom.Lunes           : num [1:318] 2.25 2.75 14.6 26.8 31.8 22.6 11.4 5.6 9.8 7.8 ...
##  $ Prom.Martes          : num [1:318] 1.67 2.67 24 22.8 35.6 ...
##  $ Prom.Miercoles       : num [1:318] 1.8 3 23 22.8 28.2 23.6 8.4 5.2 9.2 6.6 ...
##  $ Prom.Jueves          : num [1:318] 3.8 1.2 21.8 22.6 36 26.4 8.6 3.8 7 7 ...
##  $ Prom.Viernes         : num [1:318] 2.6 2.5 20.4 24.6 34 24.2 11.4 5 10.2 6.8 ...
##  $ Prom.Sabado          : num [1:318] 3.75 3.2 12.8 24 36 18.8 9.8 6.2 5.8 8.4 ...
##  $ Prom.Domingo         : num [1:318] 3.2 3.6 5.4 24.4 28 14.8 10.4 6.6 2.8 6.4 ...
##  $ Cant.Herido          : num [1:318] 64 65 166 595 772 513 214 145 126 159 ...
##  $ Cant.Muerto          : num [1:318] 0 2 1 2 6 2 3 6 0 3 ...
##  $ Cant.Solodaños       : num [1:318] 22 17 443 243 370 271 139 33 140 78 ...

describe(accidentes)

## accidentes 
## 
##  35  Variables      318  Observations
## --------------------------------------------------------------------------------
## Barrio 
##        n  missing distinct 
##      318        0      318 
## 
## lowest : Aguas Frias          Aldea Pablo VI       Alejandría           Alejandro Echavarría Alfonso López       
## highest: Villa Nueva          Villa Turbay         Villatina            Volcana Guayabal     Yolombo             
## --------------------------------------------------------------------------------
## Comuna 
##        n  missing distinct 
##      318        0       22 
## 
## lowest : Aranjuez                      Belén                         Buenos Aires                  Cabecera San Antonio de Prado Castilla                     
## highest: Popular                       Robledo                       San Javier                    Santa Cruz                    Villa Hermosa                
## --------------------------------------------------------------------------------
## LONGITUD_BARRIO 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      303        1   -75.59  0.04326   -75.69   -75.63 
##      .25      .50      .75      .90      .95 
##   -75.60   -75.58   -75.56   -75.54   -75.54 
## 
## lowest : -75.70382 -75.69051 -75.66146 -75.65850 -75.65724
## highest: -75.52768 -75.51358 -75.50756 -75.49999 -75.48074
## --------------------------------------------------------------------------------
## LATITUD_BARRIO 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      303        1    6.254  0.03634    6.202    6.212 
##      .25      .50      .75      .90      .95 
##    6.231    6.256    6.280    6.294    6.302 
## 
## lowest : 6.174923 6.178418 6.181063 6.183006 6.183561
## highest: 6.308200 6.308224 6.309384 6.331575 6.343410
## --------------------------------------------------------------------------------
## Cant.LunesAViernes 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      264        1    513.7    607.8     6.00    17.70 
##      .25      .50      .75      .90      .95 
##    81.25   291.00   651.25  1309.20  1909.00 
## 
## lowest :    0    1    3    4    5, highest: 3004 3367 3502 3546 4087
## --------------------------------------------------------------------------------
## Cant.SabadoYDomingo 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      196        1    161.2    171.1      2.0      8.7 
##      .25      .50      .75      .90      .95 
##     34.0    110.0    227.8    339.3    564.0 
## 
## lowest :    0    1    2    3    4, highest:  756  764  776  917 1002
## --------------------------------------------------------------------------------
## Cant.Enero 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      115    0.999    48.65    55.78      0.0      2.0 
##      .25      .50      .75      .90      .95 
##      9.0     31.0     60.0    117.3    178.7 
## 
## lowest :   0   1   2   3   4, highest: 294 297 305 315 366
## --------------------------------------------------------------------------------
## Cant.Febrero 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      131        1    54.28    62.87      0.0      1.7 
##      .25      .50      .75      .90      .95 
##     10.0     34.0     68.0    131.6    205.6 
## 
## lowest :   0   1   2   3   4, highest: 327 336 345 389 391
## --------------------------------------------------------------------------------
## Cant.Marzo 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      125        1    58.12    66.18      0.0      2.0 
##      .25      .50      .75      .90      .95 
##     12.0     36.5     78.0    140.9    203.7 
## 
## lowest :   0   1   2   3   4, highest: 318 322 382 399 421
## --------------------------------------------------------------------------------
## Cant.Abril 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      125    0.999    55.39    64.23     0.00     1.70 
##      .25      .50      .75      .90      .95 
##     8.25    35.00    71.00   132.20   218.60 
## 
## lowest :   0   1   2   3   4, highest: 301 303 355 389 396
## --------------------------------------------------------------------------------
## Cant.Mayo 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      135        1     58.5    68.08     0.00     2.00 
##      .25      .50      .75      .90      .95 
##    10.25    33.50    79.75   138.20   211.65 
## 
## lowest :   0   1   2   3   4, highest: 310 311 362 420 444
## --------------------------------------------------------------------------------
## Cant.Junio 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      121    0.999    54.07     62.9     0.00     2.00 
##      .25      .50      .75      .90      .95 
##     9.00    31.00    72.75   122.00   198.90 
## 
## lowest :   0   1   2   3   4, highest: 292 305 352 366 436
## --------------------------------------------------------------------------------
## Cant.Julio 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      130        1    57.58    66.17     0.85     3.00 
##      .25      .50      .75      .90      .95 
##    11.00    35.00    76.00   134.50   214.80 
## 
## lowest :   0   1   2   3   4, highest: 314 339 351 358 464
## --------------------------------------------------------------------------------
## Cant.Agosto 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      132        1    60.16       70     0.00     2.00 
##      .25      .50      .75      .90      .95 
##    11.25    36.00    80.75   140.30   224.15 
## 
## lowest :   0   1   2   3   4, highest: 367 369 374 408 437
## --------------------------------------------------------------------------------
## Cant.Septiembre 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      134        1    58.83    68.33      0.0      2.0 
##      .25      .50      .75      .90      .95 
##     10.0     37.5     77.0    151.5    221.3 
## 
## lowest :   0   1   2   3   4, highest: 332 367 372 396 426
## --------------------------------------------------------------------------------
## Cant.Octubre 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      129        1    58.19    67.65     1.00     2.00 
##      .25      .50      .75      .90      .95 
##    11.25    33.00    76.75   145.60   227.65 
## 
## lowest :   0   1   2   3   4, highest: 323 342 357 378 475
## --------------------------------------------------------------------------------
## Cant.Noviembre 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      127        1    54.59    63.37     0.85     2.00 
##      .25      .50      .75      .90      .95 
##    11.00    33.50    68.25   130.10   200.60 
## 
## lowest :   0   1   2   3   4, highest: 329 342 369 390 394
## --------------------------------------------------------------------------------
## Cant.Diciembre 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      133        1     56.5     65.5      1.0      2.7 
##      .25      .50      .75      .90      .95 
##     10.0     32.5     69.0    142.5    209.6 
## 
## lowest :   0   1   2   3   4, highest: 321 344 368 370 446
## --------------------------------------------------------------------------------
## Cant.Atropello 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      143        1    66.12    71.87      0.0      3.0 
##      .25      .50      .75      .90      .95 
##     15.0     43.5     92.0    149.2    193.0 
## 
## lowest :   0   1   2   3   4, highest: 322 350 403 410 920
## --------------------------------------------------------------------------------
## Cant.CaidadeOcupante 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      140        1     59.9    63.81      1.0      3.7 
##      .25      .50      .75      .90      .95 
##     12.0     40.0     86.0    151.0    187.4 
## 
## lowest :   0   1   2   3   4, highest: 250 289 293 328 363
## --------------------------------------------------------------------------------
## Cant.Choque 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      250        1    455.9    567.1     2.85    12.00 
##      .25      .50      .75      .90      .95 
##    57.25   225.50   533.75  1275.60  1895.15 
## 
## lowest :    0    1    2    3    4, highest: 2956 3220 3261 3321 3545
## --------------------------------------------------------------------------------
## Cant.ChoqueyAtropello 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      318        0        2    0.009        1 0.003145 0.006289 
## 
## --------------------------------------------------------------------------------
## Cant.Incendio 
##        n  missing distinct     Info     Mean      Gmd 
##      318        0        3    0.185  0.07547   0.1425 
##                             
## Value          0     1     2
## Frequency    297    18     3
## Proportion 0.934 0.057 0.009
## --------------------------------------------------------------------------------
## Cant.Otro 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      142        1    71.36    76.77     1.00     4.00 
##      .25      .50      .75      .90      .95 
##    15.25    44.00   100.75   166.30   220.60 
## 
## lowest :   0   1   2   3   4, highest: 346 357 390 458 472
## --------------------------------------------------------------------------------
## Cant.Volcamiento 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0       72    0.999    21.47     23.1      0.0      1.0 
##      .25      .50      .75      .90      .95 
##      5.0     14.0     29.0     51.0     69.3 
## 
## lowest :   0   1   2   3   4, highest: 113 121 124 129 148
## --------------------------------------------------------------------------------
## Prom.Lunes 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      170        1    19.43    22.17     1.00     1.50 
##      .25      .50      .75      .90      .95 
##     3.80    11.30    25.35    46.24    72.29 
## 
## lowest :   0.000000   1.000000   1.333333   1.400000   1.500000
## highest: 109.000000 119.600000 130.200000 137.600000 148.000000
## --------------------------------------------------------------------------------
## Prom.Martes 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      179        1     21.1    25.02    1.000    1.250 
##      .25      .50      .75      .90      .95 
##    3.638   12.000   26.000   51.020   82.060 
## 
## lowest :   0.000000   1.000000   1.200000   1.250000   1.333333
## highest: 129.000000 134.800000 147.200000 152.200000 174.400000
## --------------------------------------------------------------------------------
## Prom.Miercoles 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      179        1     20.8    24.47    1.000    1.333 
##      .25      .50      .75      .90      .95 
##    3.525   11.900   26.200   51.760   75.790 
## 
## lowest :   0.000000   1.000000   1.250000   1.333333   1.400000
## highest: 128.600000 132.200000 136.600000 145.600000 168.200000
## --------------------------------------------------------------------------------
## Prom.Jueves 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      175        1    20.42    23.89    1.000    1.333 
##      .25      .50      .75      .90      .95 
##    3.800   11.600   26.700   53.240   78.200 
## 
## lowest :   0.000000   1.000000   1.200000   1.250000   1.333333
## highest: 115.000000 118.200000 131.200000 138.000000 162.000000
## --------------------------------------------------------------------------------
## Prom.Viernes 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      176        1    21.56    25.56     1.00     1.38 
##      .25      .50      .75      .90      .95 
##     3.40    12.10    27.80    53.48    77.41 
## 
## lowest :   0.000000   1.000000   1.250000   1.333333   1.400000
## highest: 128.400000 144.400000 149.000000 150.000000 164.800000
## --------------------------------------------------------------------------------
## Prom.Sabado 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      170        1    19.55    21.66     1.00     1.50 
##      .25      .50      .75      .90      .95 
##     4.00    12.80    25.55    43.02    72.44 
## 
## lowest :   0.000000   1.000000   1.200000   1.250000   1.333333
## highest: 107.200000 109.600000 112.000000 124.400000 150.000000
## --------------------------------------------------------------------------------
## Prom.Domingo 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      149        1    12.93    12.63     1.00     1.38 
##      .25      .50      .75      .90      .95 
##     3.45    10.10    18.60    28.32    38.15 
## 
## lowest :  0.000000  1.000000  1.333333  1.400000  1.500000
## highest: 50.400000 52.000000 52.400000 59.000000 71.400000
## --------------------------------------------------------------------------------
## Cant.Herido 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      254        1    367.6    394.7     3.85    19.00 
##      .25      .50      .75      .90      .95 
##    81.50   238.00   517.50   811.90  1186.15 
## 
## lowest :    0    1    2    3    4, highest: 1908 1909 1986 2123 2270
## --------------------------------------------------------------------------------
## Cant.Muerto 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0       29    0.972        4    5.189      0.0      0.0 
##      .25      .50      .75      .90      .95 
##      0.0      2.0      5.0     11.0     16.3 
## 
## lowest :  0  1  2  3  4, highest: 26 27 28 29 34
## --------------------------------------------------------------------------------
## Cant.Solodaños 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      318        0      233        1    303.2    392.9     1.00     6.70 
##      .25      .50      .75      .90      .95 
##    31.25   139.50   341.25   858.30  1354.05 
## 
## lowest :    0    1    2    3    4, highest: 2169 2212 2219 2222 2937
## --------------------------------------------------------------------------------

summary(accidentes)

##     Barrio             Comuna          LONGITUD_BARRIO  LATITUD_BARRIO 
##  Length:318         Length:318         Min.   :-75.70   Min.   :6.175  
##  Class :character   Class :character   1st Qu.:-75.60   1st Qu.:6.231  
##  Mode  :character   Mode  :character   Median :-75.58   Median :6.256  
##                                        Mean   :-75.59   Mean   :6.254  
##                                        3rd Qu.:-75.56   3rd Qu.:6.280  
##                                        Max.   :-75.48   Max.   :6.343  
##  Cant.LunesAViernes Cant.SabadoYDomingo   Cant.Enero      Cant.Febrero   
##  Min.   :   0.00    Min.   :   0.0      Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  81.25    1st Qu.:  34.0      1st Qu.:  9.00   1st Qu.: 10.00  
##  Median : 291.00    Median : 110.0      Median : 31.00   Median : 34.00  
##  Mean   : 513.68    Mean   : 161.2      Mean   : 48.65   Mean   : 54.28  
##  3rd Qu.: 651.25    3rd Qu.: 227.8      3rd Qu.: 60.00   3rd Qu.: 68.00  
##  Max.   :4087.00    Max.   :1002.0      Max.   :366.00   Max.   :391.00  
##    Cant.Marzo       Cant.Abril       Cant.Mayo        Cant.Junio    
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 12.00   1st Qu.:  8.25   1st Qu.: 10.25   1st Qu.:  9.00  
##  Median : 36.50   Median : 35.00   Median : 33.50   Median : 31.00  
##  Mean   : 58.12   Mean   : 55.39   Mean   : 58.50   Mean   : 54.07  
##  3rd Qu.: 78.00   3rd Qu.: 71.00   3rd Qu.: 79.75   3rd Qu.: 72.75  
##  Max.   :421.00   Max.   :396.00   Max.   :444.00   Max.   :436.00  
##    Cant.Julio      Cant.Agosto     Cant.Septiembre   Cant.Octubre   
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 11.00   1st Qu.: 11.25   1st Qu.: 10.00   1st Qu.: 11.25  
##  Median : 35.00   Median : 36.00   Median : 37.50   Median : 33.00  
##  Mean   : 57.58   Mean   : 60.16   Mean   : 58.83   Mean   : 58.19  
##  3rd Qu.: 76.00   3rd Qu.: 80.75   3rd Qu.: 77.00   3rd Qu.: 76.75  
##  Max.   :464.00   Max.   :437.00   Max.   :426.00   Max.   :475.00  
##  Cant.Noviembre   Cant.Diciembre  Cant.Atropello   Cant.CaidadeOcupante
##  Min.   :  0.00   Min.   :  0.0   Min.   :  0.00   Min.   :  0.0       
##  1st Qu.: 11.00   1st Qu.: 10.0   1st Qu.: 15.00   1st Qu.: 12.0       
##  Median : 33.50   Median : 32.5   Median : 43.50   Median : 40.0       
##  Mean   : 54.59   Mean   : 56.5   Mean   : 66.12   Mean   : 59.9       
##  3rd Qu.: 68.25   3rd Qu.: 69.0   3rd Qu.: 92.00   3rd Qu.: 86.0       
##  Max.   :394.00   Max.   :446.0   Max.   :920.00   Max.   :363.0       
##   Cant.Choque      Cant.ChoqueyAtropello Cant.Incendio       Cant.Otro     
##  Min.   :   0.00   Min.   :0.000000      Min.   :0.00000   Min.   :  0.00  
##  1st Qu.:  57.25   1st Qu.:0.000000      1st Qu.:0.00000   1st Qu.: 15.25  
##  Median : 225.50   Median :0.000000      Median :0.00000   Median : 44.00  
##  Mean   : 455.93   Mean   :0.003145      Mean   :0.07547   Mean   : 71.36  
##  3rd Qu.: 533.75   3rd Qu.:0.000000      3rd Qu.:0.00000   3rd Qu.:100.75  
##  Max.   :3545.00   Max.   :1.000000      Max.   :2.00000   Max.   :472.00  
##  Cant.Volcamiento   Prom.Lunes      Prom.Martes      Prom.Miercoles   
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Min.   :  0.000  
##  1st Qu.:  5.00   1st Qu.:  3.80   1st Qu.:  3.638   1st Qu.:  3.525  
##  Median : 14.00   Median : 11.30   Median : 12.000   Median : 11.900  
##  Mean   : 21.47   Mean   : 19.43   Mean   : 21.100   Mean   : 20.805  
##  3rd Qu.: 29.00   3rd Qu.: 25.35   3rd Qu.: 26.000   3rd Qu.: 26.200  
##  Max.   :148.00   Max.   :148.00   Max.   :174.400   Max.   :168.200  
##   Prom.Jueves      Prom.Viernes     Prom.Sabado      Prom.Domingo  
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.:  3.80   1st Qu.:  3.40   1st Qu.:  4.00   1st Qu.: 3.45  
##  Median : 11.60   Median : 12.10   Median : 12.80   Median :10.10  
##  Mean   : 20.42   Mean   : 21.56   Mean   : 19.55   Mean   :12.93  
##  3rd Qu.: 26.70   3rd Qu.: 27.80   3rd Qu.: 25.55   3rd Qu.:18.60  
##  Max.   :162.00   Max.   :164.80   Max.   :150.00   Max.   :71.40  
##   Cant.Herido      Cant.Muerto Cant.Solodaños   
##  Min.   :   0.0   Min.   : 0   Min.   :   0.00  
##  1st Qu.:  81.5   1st Qu.: 0   1st Qu.:  31.25  
##  Median : 238.0   Median : 2   Median : 139.50  
##  Mean   : 367.6   Mean   : 4   Mean   : 303.22  
##  3rd Qu.: 517.5   3rd Qu.: 5   3rd Qu.: 341.25  
##  Max.   :2270.0   Max.   :34   Max.   :2937.00

colnames(accidentes)

##  [1] "Barrio"                "Comuna"                "LONGITUD_BARRIO"      
##  [4] "LATITUD_BARRIO"        "Cant.LunesAViernes"    "Cant.SabadoYDomingo"  
##  [7] "Cant.Enero"            "Cant.Febrero"          "Cant.Marzo"           
## [10] "Cant.Abril"            "Cant.Mayo"             "Cant.Junio"           
## [13] "Cant.Julio"            "Cant.Agosto"           "Cant.Septiembre"      
## [16] "Cant.Octubre"          "Cant.Noviembre"        "Cant.Diciembre"       
## [19] "Cant.Atropello"        "Cant.CaidadeOcupante"  "Cant.Choque"          
## [22] "Cant.ChoqueyAtropello" "Cant.Incendio"         "Cant.Otro"            
## [25] "Cant.Volcamiento"      "Prom.Lunes"            "Prom.Martes"          
## [28] "Prom.Miercoles"        "Prom.Jueves"           "Prom.Viernes"         
## [31] "Prom.Sabado"           "Prom.Domingo"          "Cant.Herido"          
## [34] "Cant.Muerto"           "Cant.Solodaños"

accidentes.numericas<-accidentes[,-c(1,2,3,4)]
head(accidentes.numericas)

## # A tibble: 6 x 31
##   Cant.LunesAVier~ Cant.SabadoYDom~ Cant.Enero Cant.Febrero Cant.Marzo
##              <dbl>            <dbl>      <dbl>        <dbl>      <dbl>
## 1               55               31          5            8          7
## 2               50               34         10            8          7
## 3              519               91         45           54         46
## 4              598              242         60           58         82
## 5              828              320         96           90        115
## 6              618              168         56           71         65
## # ... with 26 more variables: Cant.Abril <dbl>, Cant.Mayo <dbl>,
## #   Cant.Junio <dbl>, Cant.Julio <dbl>, Cant.Agosto <dbl>,
## #   Cant.Septiembre <dbl>, Cant.Octubre <dbl>, Cant.Noviembre <dbl>,
## #   Cant.Diciembre <dbl>, Cant.Atropello <dbl>, Cant.CaidadeOcupante <dbl>,
## #   Cant.Choque <dbl>, Cant.ChoqueyAtropello <dbl>, Cant.Incendio <dbl>,
## #   Cant.Otro <dbl>, Cant.Volcamiento <dbl>, Prom.Lunes <dbl>,
## #   Prom.Martes <dbl>, Prom.Miercoles <dbl>, Prom.Jueves <dbl>,
## #   Prom.Viernes <dbl>, Prom.Sabado <dbl>, Prom.Domingo <dbl>,
## #   Cant.Herido <dbl>, Cant.Muerto <dbl>, Cant.Solodaños <dbl>

colnames(accidentes.numericas)

##  [1] "Cant.LunesAViernes"    "Cant.SabadoYDomingo"   "Cant.Enero"           
##  [4] "Cant.Febrero"          "Cant.Marzo"            "Cant.Abril"           
##  [7] "Cant.Mayo"             "Cant.Junio"            "Cant.Julio"           
## [10] "Cant.Agosto"           "Cant.Septiembre"       "Cant.Octubre"         
## [13] "Cant.Noviembre"        "Cant.Diciembre"        "Cant.Atropello"       
## [16] "Cant.CaidadeOcupante"  "Cant.Choque"           "Cant.ChoqueyAtropello"
## [19] "Cant.Incendio"         "Cant.Otro"             "Cant.Volcamiento"     
## [22] "Prom.Lunes"            "Prom.Martes"           "Prom.Miercoles"       
## [25] "Prom.Jueves"           "Prom.Viernes"          "Prom.Sabado"          
## [28] "Prom.Domingo"          "Cant.Herido"           "Cant.Muerto"          
## [31] "Cant.Solodaños"

dim(accidentes.numericas)

## [1] 318  31

class(accidentes.numericas)

## [1] "tbl_df"     "tbl"        "data.frame"

str(accidentes.numericas)

## tibble [318 x 31] (S3: tbl_df/tbl/data.frame)
##  $ Cant.LunesAViernes   : num [1:318] 55 50 519 598 828 618 255 120 223 166 ...
##  $ Cant.SabadoYDomingo  : num [1:318] 31 34 91 242 320 168 101 64 43 74 ...
##  $ Cant.Enero           : num [1:318] 5 10 45 60 96 56 34 12 24 16 ...
##  $ Cant.Febrero         : num [1:318] 8 8 54 58 90 71 29 12 21 24 ...
##  $ Cant.Marzo           : num [1:318] 7 7 46 82 115 65 31 20 15 26 ...
##  $ Cant.Abril           : num [1:318] 4 8 44 75 90 67 28 19 26 18 ...
##  $ Cant.Mayo            : num [1:318] 13 6 62 81 102 78 25 11 16 22 ...
##  $ Cant.Junio           : num [1:318] 6 4 52 80 71 67 31 14 24 18 ...
##  $ Cant.Julio           : num [1:318] 10 12 51 60 104 68 34 15 21 13 ...
##  $ Cant.Agosto          : num [1:318] 10 7 53 68 89 72 36 16 24 23 ...
##  $ Cant.Septiembre      : num [1:318] 5 10 52 81 95 71 31 18 32 19 ...
##  $ Cant.Octubre         : num [1:318] 6 2 51 66 121 52 26 13 25 14 ...
##  $ Cant.Noviembre       : num [1:318] 7 5 57 58 81 55 22 16 16 24 ...
##  $ Cant.Diciembre       : num [1:318] 5 5 43 71 94 64 29 18 22 23 ...
##  $ Cant.Atropello       : num [1:318] 16 27 15 98 132 51 62 30 15 58 ...
##  $ Cant.CaidadeOcupante : num [1:318] 12 12 19 84 178 146 29 29 19 25 ...
##  $ Cant.Choque          : num [1:318] 40 35 530 482 608 468 209 77 199 115 ...
##  $ Cant.ChoqueyAtropello: num [1:318] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Cant.Incendio        : num [1:318] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Cant.Otro            : num [1:318] 14 9 37 125 181 105 44 38 23 30 ...
##  $ Cant.Volcamiento     : num [1:318] 4 1 9 51 49 16 12 10 10 12 ...
##  $ Prom.Lunes           : num [1:318] 2.25 2.75 14.6 26.8 31.8 22.6 11.4 5.6 9.8 7.8 ...
##  $ Prom.Martes          : num [1:318] 1.67 2.67 24 22.8 35.6 ...
##  $ Prom.Miercoles       : num [1:318] 1.8 3 23 22.8 28.2 23.6 8.4 5.2 9.2 6.6 ...
##  $ Prom.Jueves          : num [1:318] 3.8 1.2 21.8 22.6 36 26.4 8.6 3.8 7 7 ...
##  $ Prom.Viernes         : num [1:318] 2.6 2.5 20.4 24.6 34 24.2 11.4 5 10.2 6.8 ...
##  $ Prom.Sabado          : num [1:318] 3.75 3.2 12.8 24 36 18.8 9.8 6.2 5.8 8.4 ...
##  $ Prom.Domingo         : num [1:318] 3.2 3.6 5.4 24.4 28 14.8 10.4 6.6 2.8 6.4 ...
##  $ Cant.Herido          : num [1:318] 64 65 166 595 772 513 214 145 126 159 ...
##  $ Cant.Muerto          : num [1:318] 0 2 1 2 6 2 3 6 0 3 ...
##  $ Cant.Solodaños       : num [1:318] 22 17 443 243 370 271 139 33 140 78 ...

summary(accidentes.numericas)

##  Cant.LunesAViernes Cant.SabadoYDomingo   Cant.Enero      Cant.Febrero   
##  Min.   :   0.00    Min.   :   0.0      Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  81.25    1st Qu.:  34.0      1st Qu.:  9.00   1st Qu.: 10.00  
##  Median : 291.00    Median : 110.0      Median : 31.00   Median : 34.00  
##  Mean   : 513.68    Mean   : 161.2      Mean   : 48.65   Mean   : 54.28  
##  3rd Qu.: 651.25    3rd Qu.: 227.8      3rd Qu.: 60.00   3rd Qu.: 68.00  
##  Max.   :4087.00    Max.   :1002.0      Max.   :366.00   Max.   :391.00  
##    Cant.Marzo       Cant.Abril       Cant.Mayo        Cant.Junio    
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 12.00   1st Qu.:  8.25   1st Qu.: 10.25   1st Qu.:  9.00  
##  Median : 36.50   Median : 35.00   Median : 33.50   Median : 31.00  
##  Mean   : 58.12   Mean   : 55.39   Mean   : 58.50   Mean   : 54.07  
##  3rd Qu.: 78.00   3rd Qu.: 71.00   3rd Qu.: 79.75   3rd Qu.: 72.75  
##  Max.   :421.00   Max.   :396.00   Max.   :444.00   Max.   :436.00  
##    Cant.Julio      Cant.Agosto     Cant.Septiembre   Cant.Octubre   
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 11.00   1st Qu.: 11.25   1st Qu.: 10.00   1st Qu.: 11.25  
##  Median : 35.00   Median : 36.00   Median : 37.50   Median : 33.00  
##  Mean   : 57.58   Mean   : 60.16   Mean   : 58.83   Mean   : 58.19  
##  3rd Qu.: 76.00   3rd Qu.: 80.75   3rd Qu.: 77.00   3rd Qu.: 76.75  
##  Max.   :464.00   Max.   :437.00   Max.   :426.00   Max.   :475.00  
##  Cant.Noviembre   Cant.Diciembre  Cant.Atropello   Cant.CaidadeOcupante
##  Min.   :  0.00   Min.   :  0.0   Min.   :  0.00   Min.   :  0.0       
##  1st Qu.: 11.00   1st Qu.: 10.0   1st Qu.: 15.00   1st Qu.: 12.0       
##  Median : 33.50   Median : 32.5   Median : 43.50   Median : 40.0       
##  Mean   : 54.59   Mean   : 56.5   Mean   : 66.12   Mean   : 59.9       
##  3rd Qu.: 68.25   3rd Qu.: 69.0   3rd Qu.: 92.00   3rd Qu.: 86.0       
##  Max.   :394.00   Max.   :446.0   Max.   :920.00   Max.   :363.0       
##   Cant.Choque      Cant.ChoqueyAtropello Cant.Incendio       Cant.Otro     
##  Min.   :   0.00   Min.   :0.000000      Min.   :0.00000   Min.   :  0.00  
##  1st Qu.:  57.25   1st Qu.:0.000000      1st Qu.:0.00000   1st Qu.: 15.25  
##  Median : 225.50   Median :0.000000      Median :0.00000   Median : 44.00  
##  Mean   : 455.93   Mean   :0.003145      Mean   :0.07547   Mean   : 71.36  
##  3rd Qu.: 533.75   3rd Qu.:0.000000      3rd Qu.:0.00000   3rd Qu.:100.75  
##  Max.   :3545.00   Max.   :1.000000      Max.   :2.00000   Max.   :472.00  
##  Cant.Volcamiento   Prom.Lunes      Prom.Martes      Prom.Miercoles   
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Min.   :  0.000  
##  1st Qu.:  5.00   1st Qu.:  3.80   1st Qu.:  3.638   1st Qu.:  3.525  
##  Median : 14.00   Median : 11.30   Median : 12.000   Median : 11.900  
##  Mean   : 21.47   Mean   : 19.43   Mean   : 21.100   Mean   : 20.805  
##  3rd Qu.: 29.00   3rd Qu.: 25.35   3rd Qu.: 26.000   3rd Qu.: 26.200  
##  Max.   :148.00   Max.   :148.00   Max.   :174.400   Max.   :168.200  
##   Prom.Jueves      Prom.Viernes     Prom.Sabado      Prom.Domingo  
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.:  3.80   1st Qu.:  3.40   1st Qu.:  4.00   1st Qu.: 3.45  
##  Median : 11.60   Median : 12.10   Median : 12.80   Median :10.10  
##  Mean   : 20.42   Mean   : 21.56   Mean   : 19.55   Mean   :12.93  
##  3rd Qu.: 26.70   3rd Qu.: 27.80   3rd Qu.: 25.55   3rd Qu.:18.60  
##  Max.   :162.00   Max.   :164.80   Max.   :150.00   Max.   :71.40  
##   Cant.Herido      Cant.Muerto Cant.Solodaños   
##  Min.   :   0.0   Min.   : 0   Min.   :   0.00  
##  1st Qu.:  81.5   1st Qu.: 0   1st Qu.:  31.25  
##  Median : 238.0   Median : 2   Median : 139.50  
##  Mean   : 367.6   Mean   : 4   Mean   : 303.22  
##  3rd Qu.: 517.5   3rd Qu.: 5   3rd Qu.: 341.25  
##  Max.   :2270.0   Max.   :34   Max.   :2937.00

# Análisis de correlación entre las variables
cor(accidentes.numericas)

##                       Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero
## Cant.LunesAViernes             1.0000000           0.9636522  0.9876425
## Cant.SabadoYDomingo            0.9636522           1.0000000  0.9701100
## Cant.Enero                     0.9876425           0.9701100  1.0000000
## Cant.Febrero                   0.9917373           0.9606542  0.9820365
## Cant.Marzo                     0.9900986           0.9734615  0.9788448
## Cant.Abril                     0.9926879           0.9722781  0.9822133
## Cant.Mayo                      0.9910002           0.9731444  0.9800930
## Cant.Junio                     0.9891596           0.9752020  0.9833625
## Cant.Julio                     0.9908467           0.9716935  0.9816521
## Cant.Agosto                    0.9934022           0.9693279  0.9807494
## Cant.Septiembre                0.9946397           0.9680880  0.9838916
## Cant.Octubre                   0.9924785           0.9701513  0.9797234
## Cant.Noviembre                 0.9929349           0.9696726  0.9831179
## Cant.Diciembre                 0.9902785           0.9674028  0.9832371
## Cant.Atropello                 0.7546528           0.8204026  0.7570392
## Cant.CaidadeOcupante           0.8433992           0.9134499  0.8575765
## Cant.Choque                    0.9942017           0.9431297  0.9794424
## Cant.ChoqueyAtropello          0.1026266           0.1310495  0.1047501
## Cant.Incendio                  0.3062135           0.2955440  0.2797577
## Cant.Otro                      0.8922664           0.9412521  0.9053122
## Cant.Volcamiento               0.8993896           0.9271295  0.9036856
## Prom.Lunes                     0.9962606           0.9671242  0.9861355
## Prom.Martes                    0.9972238           0.9566530  0.9836819
## Prom.Miercoles                 0.9974549           0.9604599  0.9841546
## Prom.Jueves                    0.9974184           0.9595735  0.9848078
## Prom.Viernes                   0.9975649           0.9590346  0.9854632
## Prom.Sabado                    0.9830133           0.9872587  0.9817570
## Prom.Domingo                   0.8609545           0.9545218  0.8816438
## Cant.Herido                    0.9550821           0.9852361  0.9579810
## Cant.Muerto                    0.8356747           0.8412418  0.8175117
## Cant.Solodaños                 0.9823887           0.9166029  0.9644623
##                       Cant.Febrero Cant.Marzo Cant.Abril Cant.Mayo Cant.Junio
## Cant.LunesAViernes       0.9917373  0.9900986  0.9926879 0.9910002  0.9891596
## Cant.SabadoYDomingo      0.9606542  0.9734615  0.9722781 0.9731444  0.9752020
## Cant.Enero               0.9820365  0.9788448  0.9822133 0.9800930  0.9833625
## Cant.Febrero             1.0000000  0.9844241  0.9842199 0.9824957  0.9786088
## Cant.Marzo               0.9844241  1.0000000  0.9872455 0.9885439  0.9819904
## Cant.Abril               0.9842199  0.9872455  1.0000000 0.9879343  0.9856743
## Cant.Mayo                0.9824957  0.9885439  0.9879343 1.0000000  0.9838462
## Cant.Junio               0.9786088  0.9819904  0.9856743 0.9838462  1.0000000
## Cant.Julio               0.9810422  0.9822687  0.9869688 0.9862096  0.9848422
## Cant.Agosto              0.9852735  0.9862027  0.9877982 0.9870403  0.9843470
## Cant.Septiembre          0.9853046  0.9851827  0.9887901 0.9868705  0.9847239
## Cant.Octubre             0.9837770  0.9848817  0.9869285 0.9846509  0.9855781
## Cant.Noviembre           0.9847525  0.9857442  0.9872986 0.9839009  0.9854905
## Cant.Diciembre           0.9799591  0.9811499  0.9835579 0.9823279  0.9847396
## Cant.Atropello           0.7417624  0.7632526  0.7601691 0.7740907  0.7850885
## Cant.CaidadeOcupante     0.8468267  0.8683177  0.8592814 0.8669153  0.8597279
## Cant.Choque              0.9855679  0.9802314  0.9846921 0.9800865  0.9787444
## Cant.ChoqueyAtropello    0.1131707  0.1153291  0.1076544 0.1258579  0.1172451
## Cant.Incendio            0.2901392  0.3068948  0.3070991 0.3033907  0.2979335
## Cant.Otro                0.8958310  0.9111782  0.9042365 0.9084580  0.9032812
## Cant.Volcamiento         0.8994124  0.9065906  0.9094035 0.9083424  0.9049377
## Prom.Lunes               0.9895912  0.9881569  0.9909236 0.9898418  0.9872449
## Prom.Martes              0.9877082  0.9872548  0.9895691 0.9865050  0.9852179
## Prom.Miercoles           0.9883734  0.9869284  0.9894108 0.9881448  0.9859678
## Prom.Jueves              0.9899598  0.9859830  0.9895556 0.9873271  0.9870057
## Prom.Viernes             0.9891327  0.9874035  0.9895920 0.9888664  0.9859286
## Prom.Sabado              0.9743759  0.9836470  0.9842411 0.9835040  0.9854827
## Prom.Domingo             0.8690087  0.8869367  0.8828270 0.8866851  0.8886083
## Cant.Herido              0.9548400  0.9640836  0.9616079 0.9660169  0.9622402
## Cant.Muerto              0.8218346  0.8302889  0.8406152 0.8343514  0.8408633
## Cant.Solodaños           0.9695580  0.9638324  0.9692024 0.9632914  0.9645954
##                       Cant.Julio Cant.Agosto Cant.Septiembre Cant.Octubre
## Cant.LunesAViernes    0.99084666   0.9934022       0.9946397    0.9924785
## Cant.SabadoYDomingo   0.97169347   0.9693279       0.9680880    0.9701513
## Cant.Enero            0.98165205   0.9807494       0.9838916    0.9797234
## Cant.Febrero          0.98104224   0.9852735       0.9853046    0.9837770
## Cant.Marzo            0.98226872   0.9862027       0.9851827    0.9848817
## Cant.Abril            0.98696883   0.9877982       0.9887901    0.9869285
## Cant.Mayo             0.98620960   0.9870403       0.9868705    0.9846509
## Cant.Junio            0.98484217   0.9843470       0.9847239    0.9855781
## Cant.Julio            1.00000000   0.9874602       0.9878484    0.9874537
## Cant.Agosto           0.98746023   1.0000000       0.9912874    0.9865401
## Cant.Septiembre       0.98784843   0.9912874       1.0000000    0.9887801
## Cant.Octubre          0.98745366   0.9865401       0.9887801    1.0000000
## Cant.Noviembre        0.98302681   0.9882537       0.9887799    0.9872903
## Cant.Diciembre        0.98046486   0.9819277       0.9847394    0.9848629
## Cant.Atropello        0.78809167   0.7571546       0.7595344    0.7840646
## Cant.CaidadeOcupante  0.85853122   0.8556647       0.8495035    0.8580884
## Cant.Choque           0.98035109   0.9855075       0.9881352    0.9821020
## Cant.ChoqueyAtropello 0.09512196   0.1067402       0.1076693    0.1074549
## Cant.Incendio         0.31537628   0.3183136       0.3084148    0.3032953
## Cant.Otro             0.89588343   0.9031173       0.8949386    0.8973021
## Cant.Volcamiento      0.90075533   0.9098913       0.9005140    0.8994935
## Prom.Lunes            0.98930178   0.9906839       0.9915323    0.9898928
## Prom.Martes           0.98726101   0.9887451       0.9910187    0.9895266
## Prom.Miercoles        0.98756395   0.9927881       0.9925070    0.9895343
## Prom.Jueves           0.98799949   0.9894684       0.9916918    0.9904950
## Prom.Viernes          0.98772745   0.9907136       0.9914774    0.9885558
## Prom.Sabado           0.98431122   0.9827187       0.9833287    0.9851403
## Prom.Domingo          0.88083982   0.8770939       0.8721370    0.8748540
## Cant.Herido           0.96007613   0.9600962       0.9581141    0.9599157
## Cant.Muerto           0.83589148   0.8388603       0.8328400    0.8437316
## Cant.Solodaños        0.96768605   0.9704722       0.9736186    0.9695297
##                       Cant.Noviembre Cant.Diciembre Cant.Atropello
## Cant.LunesAViernes         0.9929349     0.99027847      0.7546528
## Cant.SabadoYDomingo        0.9696726     0.96740279      0.8204026
## Cant.Enero                 0.9831179     0.98323714      0.7570392
## Cant.Febrero               0.9847525     0.97995912      0.7417624
## Cant.Marzo                 0.9857442     0.98114991      0.7632526
## Cant.Abril                 0.9872986     0.98355792      0.7601691
## Cant.Mayo                  0.9839009     0.98232788      0.7740907
## Cant.Junio                 0.9854905     0.98473955      0.7850885
## Cant.Julio                 0.9830268     0.98046486      0.7880917
## Cant.Agosto                0.9882537     0.98192771      0.7571546
## Cant.Septiembre            0.9887799     0.98473942      0.7595344
## Cant.Octubre               0.9872903     0.98486287      0.7840646
## Cant.Noviembre             1.0000000     0.98711015      0.7567224
## Cant.Diciembre             0.9871102     1.00000000      0.7789588
## Cant.Atropello             0.7567224     0.77895875      1.0000000
## Cant.CaidadeOcupante       0.8527010     0.84960510      0.7578215
## Cant.Choque                0.9854206     0.98099159      0.7043026
## Cant.ChoqueyAtropello      0.1121794     0.08734755      0.1059330
## Cant.Incendio              0.3037392     0.30435811      0.2572751
## Cant.Otro                  0.9032200     0.89470393      0.6977982
## Cant.Volcamiento           0.9107453     0.89589040      0.6794083
## Prom.Lunes                 0.9900147     0.98673123      0.7645061
## Prom.Martes                0.9902659     0.98647557      0.7541572
## Prom.Miercoles             0.9909659     0.98814177      0.7526100
## Prom.Jueves                0.9896327     0.98846229      0.7525719
## Prom.Viernes               0.9900216     0.98756650      0.7376937
## Prom.Sabado                0.9833529     0.98186351      0.8061124
## Prom.Domingo               0.8777212     0.87370793      0.7897633
## Cant.Herido                0.9616282     0.95623247      0.8029844
## Cant.Muerto                0.8412859     0.84960143      0.7721552
## Cant.Solodaños             0.9685510     0.96841515      0.7030532
##                       Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello
## Cant.LunesAViernes              0.84339917   0.9942017            0.10262655
## Cant.SabadoYDomingo             0.91344990   0.9431297            0.13104946
## Cant.Enero                      0.85757651   0.9794424            0.10475006
## Cant.Febrero                    0.84682670   0.9855679            0.11317066
## Cant.Marzo                      0.86831770   0.9802314            0.11532911
## Cant.Abril                      0.85928141   0.9846921            0.10765437
## Cant.Mayo                       0.86691531   0.9800865            0.12585786
## Cant.Junio                      0.85972793   0.9787444            0.11724511
## Cant.Julio                      0.85853122   0.9803511            0.09512196
## Cant.Agosto                     0.85566466   0.9855075            0.10674022
## Cant.Septiembre                 0.84950354   0.9881352            0.10766935
## Cant.Octubre                    0.85808843   0.9821020            0.10745488
## Cant.Noviembre                  0.85270102   0.9854206            0.11217940
## Cant.Diciembre                  0.84960510   0.9809916            0.08734755
## Cant.Atropello                  0.75782150   0.7043026            0.10593296
## Cant.CaidadeOcupante            1.00000000   0.7903776            0.05389119
## Cant.Choque                     0.79037756   1.0000000            0.11391932
## Cant.ChoqueyAtropello           0.05389119   0.1139193            1.00000000
## Cant.Incendio                   0.28048852   0.2963503           -0.01423740
## Cant.Otro                       0.96329104   0.8546215            0.06059832
## Cant.Volcamiento                0.89881830   0.8711757            0.09651811
## Prom.Lunes                      0.85635717   0.9870984            0.10961154
## Prom.Martes                     0.83472692   0.9919581            0.09864073
## Prom.Miercoles                  0.83500790   0.9928591            0.09603943
## Prom.Jueves                     0.84255280   0.9913648            0.10324043
## Prom.Viernes                    0.83499117   0.9936906            0.10531212
## Prom.Sabado                     0.86853598   0.9720076            0.12268732
## Prom.Domingo                    0.93479727   0.8238979            0.13885728
## Cant.Herido                     0.94201683   0.9256545            0.11542020
## Cant.Muerto                     0.72327705   0.8172679            0.07841192
## Cant.Solodaños                  0.74723820   0.9920707            0.09788647
##                       Cant.Incendio  Cant.Otro Cant.Volcamiento Prom.Lunes
## Cant.LunesAViernes        0.3062135 0.89226637       0.89938959  0.9962606
## Cant.SabadoYDomingo       0.2955440 0.94125212       0.92712951  0.9671242
## Cant.Enero                0.2797577 0.90531222       0.90368557  0.9861355
## Cant.Febrero              0.2901392 0.89583095       0.89941235  0.9895912
## Cant.Marzo                0.3068948 0.91117816       0.90659060  0.9881569
## Cant.Abril                0.3070991 0.90423649       0.90940353  0.9909236
## Cant.Mayo                 0.3033907 0.90845797       0.90834242  0.9898418
## Cant.Junio                0.2979335 0.90328122       0.90493771  0.9872449
## Cant.Julio                0.3153763 0.89588343       0.90075533  0.9893018
## Cant.Agosto               0.3183136 0.90311728       0.90989131  0.9906839
## Cant.Septiembre           0.3084148 0.89493860       0.90051398  0.9915323
## Cant.Octubre              0.3032953 0.89730210       0.89949348  0.9898928
## Cant.Noviembre            0.3037392 0.90322002       0.91074525  0.9900147
## Cant.Diciembre            0.3043581 0.89470393       0.89589040  0.9867312
## Cant.Atropello            0.2572751 0.69779819       0.67940834  0.7645061
## Cant.CaidadeOcupante      0.2804885 0.96329104       0.89881830  0.8563572
## Cant.Choque               0.2963503 0.85462153       0.87117566  0.9870984
## Cant.ChoqueyAtropello    -0.0142374 0.06059832       0.09651811  0.1096115
## Cant.Incendio             1.0000000 0.27731067       0.31961870  0.2944432
## Cant.Otro                 0.2773107 1.00000000       0.95193635  0.9020010
## Cant.Volcamiento          0.3196187 0.95193635       1.00000000  0.9083580
## Prom.Lunes                0.2944432 0.90200103       0.90835803  1.0000000
## Prom.Martes               0.3058605 0.88160839       0.88952119  0.9921416
## Prom.Miercoles            0.3036526 0.88516373       0.89283594  0.9915235
## Prom.Jueves               0.3095710 0.88879190       0.89337746  0.9919925
## Prom.Viernes              0.3117819 0.88996145       0.89968244  0.9925345
## Prom.Sabado               0.2994680 0.90502533       0.89686817  0.9807919
## Prom.Domingo              0.2680752 0.94480944       0.92073877  0.8751188
## Cant.Herido               0.3201475 0.96438375       0.95325779  0.9618677
## Cant.Muerto               0.3373043 0.75882498       0.78830157  0.8396572
## Cant.Solodaños            0.2757929 0.80923368       0.82364744  0.9722600
##                       Prom.Martes Prom.Miercoles Prom.Jueves Prom.Viernes
## Cant.LunesAViernes     0.99722380     0.99745495   0.9974184    0.9975649
## Cant.SabadoYDomingo    0.95665303     0.96045990   0.9595735    0.9590346
## Cant.Enero             0.98368192     0.98415463   0.9848078    0.9854632
## Cant.Febrero           0.98770823     0.98837338   0.9899598    0.9891327
## Cant.Marzo             0.98725485     0.98692835   0.9859830    0.9874035
## Cant.Abril             0.98956912     0.98941084   0.9895556    0.9895920
## Cant.Mayo              0.98650500     0.98814482   0.9873271    0.9888664
## Cant.Junio             0.98521787     0.98596780   0.9870057    0.9859286
## Cant.Julio             0.98726101     0.98756395   0.9879995    0.9877274
## Cant.Agosto            0.98874513     0.99278814   0.9894684    0.9907136
## Cant.Septiembre        0.99101873     0.99250704   0.9916918    0.9914774
## Cant.Octubre           0.98952662     0.98953434   0.9904950    0.9885558
## Cant.Noviembre         0.99026590     0.99096585   0.9896327    0.9900216
## Cant.Diciembre         0.98647557     0.98814177   0.9884623    0.9875665
## Cant.Atropello         0.75415724     0.75261002   0.7525719    0.7376937
## Cant.CaidadeOcupante   0.83472692     0.83500790   0.8425528    0.8349912
## Cant.Choque            0.99195806     0.99285909   0.9913648    0.9936906
## Cant.ChoqueyAtropello  0.09864073     0.09603943   0.1032404    0.1053121
## Cant.Incendio          0.30586051     0.30365260   0.3095710    0.3117819
## Cant.Otro              0.88160839     0.88516373   0.8887919    0.8899614
## Cant.Volcamiento       0.88952119     0.89283594   0.8933775    0.8996824
## Prom.Lunes             0.99214159     0.99152349   0.9919925    0.9925345
## Prom.Martes            1.00000000     0.99348171   0.9936552    0.9932621
## Prom.Miercoles         0.99348171     1.00000000   0.9942484    0.9941611
## Prom.Jueves            0.99365519     0.99424845   1.0000000    0.9938369
## Prom.Viernes           0.99326205     0.99416109   0.9938369    1.0000000
## Prom.Sabado            0.97967185     0.98242212   0.9792855    0.9783342
## Prom.Domingo           0.84794394     0.85329433   0.8569252    0.8570797
## Cant.Herido            0.94724502     0.94844658   0.9517033    0.9512748
## Cant.Muerto            0.83345747     0.83461865   0.8286585    0.8302964
## Cant.Solodaños         0.98255151     0.98326682   0.9801087    0.9804725
##                       Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto
## Cant.LunesAViernes      0.9830133    0.8609545   0.9550821  0.83567466
## Cant.SabadoYDomingo     0.9872587    0.9545218   0.9852361  0.84124180
## Cant.Enero              0.9817570    0.8816438   0.9579810  0.81751172
## Cant.Febrero            0.9743759    0.8690087   0.9548400  0.82183463
## Cant.Marzo              0.9836470    0.8869367   0.9640836  0.83028894
## Cant.Abril              0.9842411    0.8828270   0.9616079  0.84061521
## Cant.Mayo               0.9835040    0.8866851   0.9660169  0.83435138
## Cant.Junio              0.9854827    0.8886083   0.9622402  0.84086330
## Cant.Julio              0.9843112    0.8808398   0.9600761  0.83589148
## Cant.Agosto             0.9827187    0.8770939   0.9600962  0.83886030
## Cant.Septiembre         0.9833287    0.8721370   0.9581141  0.83284002
## Cant.Octubre            0.9851403    0.8748540   0.9599157  0.84373156
## Cant.Noviembre          0.9833529    0.8777212   0.9616282  0.84128595
## Cant.Diciembre          0.9818635    0.8737079   0.9562325  0.84960143
## Cant.Atropello          0.8061124    0.7897633   0.8029844  0.77215524
## Cant.CaidadeOcupante    0.8685360    0.9347973   0.9420168  0.72327705
## Cant.Choque             0.9720076    0.8238979   0.9256545  0.81726788
## Cant.ChoqueyAtropello   0.1226873    0.1388573   0.1154202  0.07841192
## Cant.Incendio           0.2994680    0.2680752   0.3201475  0.33730430
## Cant.Otro               0.9050253    0.9448094   0.9643838  0.75882498
## Cant.Volcamiento        0.8968682    0.9207388   0.9532578  0.78830157
## Prom.Lunes              0.9807919    0.8751188   0.9618677  0.83965723
## Prom.Martes             0.9796718    0.8479439   0.9472450  0.83345747
## Prom.Miercoles          0.9824221    0.8532943   0.9484466  0.83461865
## Prom.Jueves             0.9792855    0.8569252   0.9517033  0.82865853
## Prom.Viernes            0.9783342    0.8570797   0.9512748  0.83029641
## Prom.Sabado             1.0000000    0.8954828   0.9648149  0.84593052
## Prom.Domingo            0.8954828    1.0000000   0.9553321  0.77410953
## Cant.Herido             0.9648149    0.9553321   1.0000000  0.83914578
## Cant.Muerto             0.8459305    0.7741095   0.8391458  1.00000000
## Cant.Solodaños          0.9578612    0.7761887   0.8856168  0.79496345
##                       Cant.Solodaños
## Cant.LunesAViernes        0.98238870
## Cant.SabadoYDomingo       0.91660287
## Cant.Enero                0.96446230
## Cant.Febrero              0.96955805
## Cant.Marzo                0.96383237
## Cant.Abril                0.96920244
## Cant.Mayo                 0.96329143
## Cant.Junio                0.96459541
## Cant.Julio                0.96768605
## Cant.Agosto               0.97047215
## Cant.Septiembre           0.97361861
## Cant.Octubre              0.96952969
## Cant.Noviembre            0.96855099
## Cant.Diciembre            0.96841515
## Cant.Atropello            0.70305323
## Cant.CaidadeOcupante      0.74723820
## Cant.Choque               0.99207072
## Cant.ChoqueyAtropello     0.09788647
## Cant.Incendio             0.27579287
## Cant.Otro                 0.80923368
## Cant.Volcamiento          0.82364744
## Prom.Lunes                0.97226004
## Prom.Martes               0.98255151
## Prom.Miercoles            0.98326682
## Prom.Jueves               0.98010872
## Prom.Viernes              0.98047249
## Prom.Sabado               0.95786122
## Prom.Domingo              0.77618869
## Cant.Herido               0.88561676
## Cant.Muerto               0.79496345
## Cant.Solodaños            1.00000000

# Normalización de variables numéricas
Accidentes_VariablesNormalizadas <- scale(accidentes.numericas)
head(Accidentes_VariablesNormalizadas)

##      Cant.LunesAViernes Cant.SabadoYDomingo  Cant.Enero Cant.Febrero
## [1,]       -0.686835730          -0.7527820 -0.72997349 -0.686005267
## [2,]       -0.694322833          -0.7354327 -0.64635863 -0.686005267
## [3,]        0.007967407          -0.4057974 -0.06105462 -0.004148568
## [4,]        0.126263631           0.4674472  0.18978995  0.055143319
## [5,]        0.470670359           0.9185271  0.79181694  0.529478414
## [6,]        0.156212042           0.0394995  0.12289807  0.247841952
##       Cant.Marzo Cant.Abril   Cant.Mayo  Cant.Junio  Cant.Julio Cant.Agosto
## [1,] -0.72337131 -0.7416788 -0.62639123 -0.71082566 -0.66808954 -0.66632469
## [2,] -0.72337131 -0.6839528 -0.72276578 -0.74040270 -0.64000594 -0.70617634
## [3,] -0.17149809 -0.1644192  0.04823057 -0.03055364 -0.09237563 -0.09511763
## [4,]  0.33792333  0.2829571  0.30981861  0.38352497  0.03400059  0.10414065
## [5,]  0.80489298  0.4994294  0.59894224  0.25042827  0.65183991  0.38310223
## [6,]  0.09736321  0.1675051  0.26851524  0.19127419  0.14633501  0.15727619
##      Cant.Septiembre Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## [1,]     -0.73497758  -0.71302672   -0.685619459     -0.7171810     -0.6054185
## [2,]     -0.66670543  -0.76767664   -0.714434233     -0.7171810     -0.4725357
## [3,]     -0.09321939  -0.09821518    0.034749893     -0.1879989     -0.6174988
## [4,]      0.30275906   0.10672200    0.049157280      0.2019248      0.3851627
## [5,]      0.49392108   0.85815834    0.380527182      0.5222192      0.7958916
## [6,]      0.16621477  -0.08455270    0.005935119      0.1044438     -0.1826095
##      Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## [1,]           -0.7634191 -0.65799414           -0.05607722    -0.2530908
## [2,]           -0.7634191 -0.66590416           -0.05607722    -0.2530908
## [3,]           -0.6518460  0.11718767           -0.05607722    -0.2530908
## [4,]            0.3841906  0.04125149           -0.05607722    -0.2530908
## [5,]            1.8824589  0.24058396           -0.05607722    -0.2530908
## [6,]            1.3724102  0.01910344           -0.05607722    -0.2530908
##       Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles
## [1,] -0.7387921       -0.7396584 -0.7098359 -0.69684389    -0.70235339
## [2,] -0.8031899       -0.8666624 -0.6891806 -0.66098542    -0.65800500
## [3,] -0.4425624       -0.5279851 -0.1996489  0.10399519     0.08113494
## [4,]  0.6908380        1.2500707  0.3043416  0.06096503     0.07374354
## [5,]  1.4120929        1.1654014  0.5108950  0.51995340     0.27331133
## [6,]  0.4332470       -0.2316425  0.1308367  0.20439890     0.10330914
##      Prom.Jueves Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto
## [1,] -0.63325460  -0.67450318 -0.68284199   -0.8037656  -0.7612379  -0.6969438
## [2,] -0.73229959  -0.67805986 -0.70661686   -0.7707135  -0.7587308  -0.3484719
## [3,]  0.05244145  -0.04141443 -0.29163742   -0.6219790  -0.5055104  -0.5227079
## [4,]  0.08291683   0.10796607  0.19250526    0.9479962   0.5700494  -0.3484719
## [5,]  0.59337945   0.44229384  0.71122955    1.2454652   1.0138118   0.3484719
## [6,]  0.22767489   0.09373935 -0.03227527    0.1547455   0.3644645  -0.3484719
##      Cant.Solodaños
## [1,]    -0.61569825
## [2,]    -0.62664516
## [3,]     0.30603153
## [4,]    -0.13184485
## [5,]     0.14620665
## [6,]    -0.07054216

class(Accidentes_VariablesNormalizadas)

## [1] "matrix" "array"

# convertimos a dataframe
Accidentes_VariablesNormalizadas<-as.data.frame(Accidentes_VariablesNormalizadas)
class(Accidentes_VariablesNormalizadas)

## [1] "data.frame"

desc_stats <- data.frame(
  Min = apply(Accidentes_VariablesNormalizadas, 2, min), # minimo
  Med = apply(Accidentes_VariablesNormalizadas, 2, median), # mediana
  Mean = apply(Accidentes_VariablesNormalizadas, 2, mean), # media
  SD = apply(Accidentes_VariablesNormalizadas, 2, sd), # desviación estandar
  Max = apply(Accidentes_VariablesNormalizadas, 2, max) # Máximo
)

desc_stats <- round(desc_stats, 1)
head(desc_stats)

##                      Min  Med Mean SD Max
## Cant.LunesAViernes  -0.8 -0.3    0  1 5.4
## Cant.SabadoYDomingo -0.9 -0.3    0  1 4.9
## Cant.Enero          -0.8 -0.3    0  1 5.3
## Cant.Febrero        -0.8 -0.3    0  1 5.0
## Cant.Marzo          -0.8 -0.3    0  1 5.1
## Cant.Abril          -0.8 -0.3    0  1 4.9

# Segmentacion Por múltiples métodos de Clustering

# Tenemos que seleccionar un k para el número de clusters:

colnames(Accidentes_VariablesNormalizadas)

##  [1] "Cant.LunesAViernes"    "Cant.SabadoYDomingo"   "Cant.Enero"           
##  [4] "Cant.Febrero"          "Cant.Marzo"            "Cant.Abril"           
##  [7] "Cant.Mayo"             "Cant.Junio"            "Cant.Julio"           
## [10] "Cant.Agosto"           "Cant.Septiembre"       "Cant.Octubre"         
## [13] "Cant.Noviembre"        "Cant.Diciembre"        "Cant.Atropello"       
## [16] "Cant.CaidadeOcupante"  "Cant.Choque"           "Cant.ChoqueyAtropello"
## [19] "Cant.Incendio"         "Cant.Otro"             "Cant.Volcamiento"     
## [22] "Prom.Lunes"            "Prom.Martes"           "Prom.Miercoles"       
## [25] "Prom.Jueves"           "Prom.Viernes"          "Prom.Sabado"          
## [28] "Prom.Domingo"          "Cant.Herido"           "Cant.Muerto"          
## [31] "Cant.Solodaños"

# Creamos un dataset con el conjunto de datos que vamos a trabajar el clustering

preprocessed <- as.data.frame(Accidentes_VariablesNormalizadas)
head(preprocessed)

##   Cant.LunesAViernes Cant.SabadoYDomingo  Cant.Enero Cant.Febrero  Cant.Marzo
## 1       -0.686835730          -0.7527820 -0.72997349 -0.686005267 -0.72337131
## 2       -0.694322833          -0.7354327 -0.64635863 -0.686005267 -0.72337131
## 3        0.007967407          -0.4057974 -0.06105462 -0.004148568 -0.17149809
## 4        0.126263631           0.4674472  0.18978995  0.055143319  0.33792333
## 5        0.470670359           0.9185271  0.79181694  0.529478414  0.80489298
## 6        0.156212042           0.0394995  0.12289807  0.247841952  0.09736321
##   Cant.Abril   Cant.Mayo  Cant.Junio  Cant.Julio Cant.Agosto Cant.Septiembre
## 1 -0.7416788 -0.62639123 -0.71082566 -0.66808954 -0.66632469     -0.73497758
## 2 -0.6839528 -0.72276578 -0.74040270 -0.64000594 -0.70617634     -0.66670543
## 3 -0.1644192  0.04823057 -0.03055364 -0.09237563 -0.09511763     -0.09321939
## 4  0.2829571  0.30981861  0.38352497  0.03400059  0.10414065      0.30275906
## 5  0.4994294  0.59894224  0.25042827  0.65183991  0.38310223      0.49392108
## 6  0.1675051  0.26851524  0.19127419  0.14633501  0.15727619      0.16621477
##   Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## 1  -0.71302672   -0.685619459     -0.7171810     -0.6054185
## 2  -0.76767664   -0.714434233     -0.7171810     -0.4725357
## 3  -0.09821518    0.034749893     -0.1879989     -0.6174988
## 4   0.10672200    0.049157280      0.2019248      0.3851627
## 5   0.85815834    0.380527182      0.5222192      0.7958916
## 6  -0.08455270    0.005935119      0.1044438     -0.1826095
##   Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## 1           -0.7634191 -0.65799414           -0.05607722    -0.2530908
## 2           -0.7634191 -0.66590416           -0.05607722    -0.2530908
## 3           -0.6518460  0.11718767           -0.05607722    -0.2530908
## 4            0.3841906  0.04125149           -0.05607722    -0.2530908
## 5            1.8824589  0.24058396           -0.05607722    -0.2530908
## 6            1.3724102  0.01910344           -0.05607722    -0.2530908
##    Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles Prom.Jueves
## 1 -0.7387921       -0.7396584 -0.7098359 -0.69684389    -0.70235339 -0.63325460
## 2 -0.8031899       -0.8666624 -0.6891806 -0.66098542    -0.65800500 -0.73229959
## 3 -0.4425624       -0.5279851 -0.1996489  0.10399519     0.08113494  0.05244145
## 4  0.6908380        1.2500707  0.3043416  0.06096503     0.07374354  0.08291683
## 5  1.4120929        1.1654014  0.5108950  0.51995340     0.27331133  0.59337945
## 6  0.4332470       -0.2316425  0.1308367  0.20439890     0.10330914  0.22767489
##   Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto Cant.Solodaños
## 1  -0.67450318 -0.68284199   -0.8037656  -0.7612379  -0.6969438    -0.61569825
## 2  -0.67805986 -0.70661686   -0.7707135  -0.7587308  -0.3484719    -0.62664516
## 3  -0.04141443 -0.29163742   -0.6219790  -0.5055104  -0.5227079     0.30603153
## 4   0.10796607  0.19250526    0.9479962   0.5700494  -0.3484719    -0.13184485
## 5   0.44229384  0.71122955    1.2454652   1.0138118   0.3484719     0.14620665
## 6   0.09373935 -0.03227527    0.1547455   0.3644645  -0.3484719    -0.07054216

class(preprocessed)

## [1] "data.frame"

# Determinar si es posible hacer agrupamiento de datos usando el esadistico de hopkins
# Un valor menor a 0.5 indica que si es clusterware
# un valor mayor a 0.5, es una distribución normal

set.seed(123)
hopkins(preprocessed, n=nrow(preprocessed)-1)

## $H
## [1] 0.07801222

# como el valor obtenido es menor a 0.5, se puede concluir que el conjunto de datos si es agrupable


# matrix de distancias

distancia <-dist(preprocessed, method="euclidean") 
head(distancia)

## [1] 0.4543284 3.0954321 5.4862480 7.7066029 4.8075808 1.9099331

# matriz de disimilitudes usando la funcion get_dist()

res.dist <- get_dist(preprocessed, stand = TRUE, method = "pearson") 

# representa la matriz de disimilitudes en un mapa de calor que permite ver si hay tendencia de agrupamiento

fviz_dist(distancia, show_labels=FALSE)

fviz_dist(res.dist,
          gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))

# analizando las graficos de mapa de calor de la matriz de disimiliridad se puede confirmar que si hay tendencia de agrupamiento

# Tambien se puede utilizar la representación de las instancias en los ejes de los dos primeros componentes principales
# como una alternativa visual para detectar tendencia de agrupamiento

fviz_pca_ind(prcomp(preprocessed),title="componentes principales", geom="point")

# se puede observar agrupaciones

# Determinacion del número óptimo de cluster
# - usando  el metodo del codo


fviz_nbclust(preprocessed, kmeans, method = "wss")+ labs(title="Número óptimo de agrupaciones", Subtitle="Método \"Elbow\"")

# marcando numero de grupos =3

fviz_nbclust(preprocessed, kmeans, method = "wss")+ geom_vline(xintercept=3, linetype=2)+ labs(title="Número óptimo de agrupaciones", Subtitle="Método \"Elbow\"")

# A partir de la curva obtenida podemos ver cómo a medida que se aumenta la cantidad de centroides, el valor de WCSS 
# disminuye de tal forma que la gráfica adopta una forma de codo. 
# Para seleccionar el valor óptimo de k, se escoje entonces ese punto en donde ya no se dejan de producir variaciones 
# importantes del valor de WCSS al aumentar k. En este caso, vemos que esto se produce a partir de k >= 3, 

# De este primer análisis tomaremos k= 3 para el número de agrupaciones

# - usando  el método del coeficiente de silhouette

fviz_nbclust(preprocessed, kmeans, method = "silhouette")+  labs(title="Número óptimo de agrupaciones", Subtitle="Método \"Silhouette\"")

# De este segundo análisis son candidatos el seleccionar 2 el número de agrupaciones


# Determinacion del número de cluster usando estadistico gap

fviz_nbclust(preprocessed, kmeans, nstart=2, method = "gap_stat", nboot=50)+ labs(title="Número óptimo de agrupaciones", Subtitle="Método del estadístico \"Gap\"")

# De este tercer análisis la recomendación seria 4 grupos


# ++++++++++++++++++++++++
# Determinacion del número de cluster usando Metodo PAM
fviz_nbclust(preprocessed, pam, method = "silhouette")

# De este cuarto análisis es candidato el seleccionar 2 ó incluso 3 para el número de agrupaciones


# ++++++++++++++++++++++++
# Determinacion del número de cluster para el algoritmo clara y el algoritmo pam
# La función pamk() del paquete fpc , permite evaluar el número óptimo de clúster para el algoritmo clara y el algoritmo pam.


pamk(preprocessed, krange=2:10, criterion="multiasw", usepam=FALSE)

## $pamobject
## Call:     clara(x = sdata, k = k) 
## Medoids:
##      Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero Cant.Marzo
## [1,]         -0.5610524          -0.5445912 -0.4289600   -0.5229526 -0.5677148
## [2,]          0.7012731           0.6872041  0.8085399    0.6925311  0.8897965
##      Cant.Abril  Cant.Mayo Cant.Junio Cant.Julio Cant.Agosto Cant.Septiembre
## [1,] -0.5685009 -0.5850879 -0.6073060 -0.5838387  -0.5467697      -0.5438156
## [2,]  0.6148813  0.9431370  0.3391594  0.6097145   0.7550510       0.9854805
##      Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## [1,]   -0.5764019     -0.5415456     -0.5918484     -0.3638133
## [2,]    0.4892714      0.5822306      0.6475518      0.6509284
##      Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## [1,]           -0.4127606  -0.5583279           -0.05607722    -0.2530908
## [2,]            0.8942394   0.6708890           -0.05607722    -0.2530908
##       Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles
## [1,] -0.5584784       -0.6973238 -0.5549208  -0.5558006     -0.5397426
## [2,]  0.6393198        0.1493695  0.6017786   0.5773269      0.8720147
##      Prom.Jueves Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto
## [1,]  -0.5723038   -0.6033696  -0.5423542   -0.5393488  -0.5782172  -0.5227079
## [2,]   0.8143260    0.6201278   0.5988393    0.7992617   0.7530700   1.0454157
##      Cant.Solodaños
## [1,]     -0.5149867
## [2,]      0.6147344
## Objective function:   2.662442
## Clustering vector:    int [1:318] 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 2 1 1 ...
## Cluster sizes:            221 97 
## Best sample:
##  [1]   5  12  19  22  30  36  43  52  65  69  84  87  88  91  98 100 111 126 138
## [20] 169 182 191 192 205 206 214 218 221 230 237 243 244 253 263 265 272 274 276
## [39] 280 284 287 290 293 317
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.5371603 0.4369745 0.4143532 0.3892079 0.4112166 0.3394987
##  [8] 0.3889279 0.3330864 0.2770347

pamk(preprocessed, krange=2:10, criterion="multiasw", usepam=TRUE)

## $pamobject
## Medoids:
##       ID Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero
## [1,]  98         -0.5760266          -0.6602527 -0.5460208   -0.5970674
## [2,] 132          0.5275723           0.4558810  0.6245872    0.5146554
##      Cant.Marzo Cant.Abril  Cant.Mayo Cant.Junio Cant.Julio Cant.Agosto
## [1,] -0.5960160 -0.6117954 -0.6539268 -0.6368830 -0.5557551  -0.5600536
## [2,]  0.5784835  0.4849979  0.7228524  0.4574676  0.6097145   0.4495217
##      Cant.Septiembre Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## [1,]      -0.6667054   -0.5900644     -0.5559530     -0.5361450     -0.5450172
## [2,]       0.3300679    0.4072965      0.5822306      0.4108124      0.6630087
##      Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## [1,]           -0.6518460  -0.5535819           -0.05607722    -0.2530908
## [2,]            0.4160687   0.4683925           -0.05607722    -0.2530908
##       Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles
## [1,] -0.6743943       -0.4433158 -0.5879694  -0.5055987     -0.6210480
## [2,]  0.4718857        0.7420547  0.5026329   0.5701553      0.4876619
##      Prom.Jueves Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto
## [1,]  -0.5342096   -0.6531631  -0.6633898   -0.6385051  -0.6484170  -0.5227079
## [2,]   0.4714779    0.5774476   0.4778036    0.3695843   0.5349495   0.6969438
##      Cant.Solodaños
## [1,]     -0.5193654
## [2,]      0.4680458
## Clustering vector:
##   [1] 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 2 2 2 1 1 1 1 2 1 1 2 1 1
##  [38] 1 2 2 1 1 2 1 1 2 1 2 2 2 1 2 1 2 2 2 2 2 1 2 2 1 1 1 2 2 1 1 2 2 2 1 1 2
##  [75] 1 1 1 2 1 1 1 1 2 1 2 1 1 1 1 1 2 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2
## [112] 1 1 2 1 1 1 2 1 1 2 2 2 1 1 2 1 1 2 2 2 2 1 2 2 1 1 2 1 1 2 2 1 1 1 2 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 1 1 1 1 1 1 1 1 2 1 1 2 2 2
## [186] 1 1 2 1 2 1 2 2 1 1 2 2 2 2 2 1 1 1 1 1 2 1 1 2 1 1 2 1 1 1 1 1 1 2 1 1 1
## [223] 1 1 2 1 2 2 1 1 2 1 2 1 1 1 1 1 1 1 1 2 2 2 1 2 2 2 2 2 1 2 1 1 1 1 1 1 2
## [260] 2 1 1 2 1 2 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2
## [297] 1 2 2 1 1 2 2 1 1 1 2 1 2 1 1 1 1 2 1 1 1 1
## Objective function:
##    build     swap 
## 2.737260 2.632817 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.5125797 0.4889046 0.3855134 0.3662393 0.3360861 0.3473702
##  [8] 0.3456886 0.3467271 0.3395320

# De este quinto  análisis se recomienda 2 para el número de agrupaciones


# Otro método para Determinar el número de cluster para los algoritmos: kmeans","pam","clara", "som"

intern <- clValid(preprocessed, nClust=2:10, clMethods = c("kmeans","pam","clara" ),validation = "internal", maxitems = 600)
# Summary
summary(intern)

## 
## Clustering Methods:
##  kmeans pam clara 
## 
## Cluster sizes:
##  2 3 4 5 6 7 8 9 10 
## 
## Validation Measures:
##                             2        3        4        5        6        7        8        9       10
##                                                                                                      
## kmeans Connectivity    9.2385  11.1274  44.6000  49.9012  52.6052  57.3143  63.4270  66.6226  69.1421
##        Dunn            0.0827   0.0968   0.0336   0.0421   0.0544   0.0544   0.0562   0.0607   0.0607
##        Silhouette      0.7317   0.7330   0.5460   0.5122   0.5103   0.5070   0.4896   0.4296   0.4279
## pam    Connectivity   27.3500  22.6881  58.9167  58.6798  75.5429  82.2984  83.9329 101.2171  94.0111
##        Dunn            0.0300   0.0311   0.0136   0.0170   0.0226   0.0228   0.0288   0.0288   0.0288
##        Silhouette      0.5122   0.5009   0.4006   0.3535   0.3553   0.3538   0.3549   0.3500   0.3656
## clara  Connectivity   27.2702  41.5433  60.9992  72.8956  69.2067  90.1837  76.1143  87.8540 122.7278
##        Dunn            0.0304   0.0189   0.0151   0.0180   0.0366   0.0291   0.0433   0.0326   0.0277
##        Silhouette      0.5455   0.4518   0.4084   0.4070   0.4164   0.3581   0.3956   0.3519   0.3106
## 
## Optimal Scores:
## 
##              Score  Method Clusters
## Connectivity 9.2385 kmeans 2       
## Dunn         0.0968 kmeans 3       
## Silhouette   0.7330 kmeans 3

optimalScores(intern)

##                   Score Method Clusters
## Connectivity 9.23849206 kmeans        2
## Dunn         0.09679341 kmeans        3
## Silhouette   0.73300797 kmeans        3

plot(intern)

# De este otro análisis se sugiere 3 agrupaciones

# Luego de Analizar los resultados de las diferentes técnicas tomaremos  3 grupos ( k=3)

# Realizaremos un primer agrupamiento de tipo Jerarquico

## Agrupamiento Jerarquico usando la libreria hclust
# ++++++++++++++++++++++++

clus_hc <- hclust(distancia, method="ward.D2")
ggdendrogram(clus_hc, rotate = FALSE, labels = FALSE, theme_dendro = TRUE) + 
  labs(title = "Dendograma")

dendrogram <- hclust(distancia, method = 'ward.D')
ggdendrogram(dendrogram, rotate = FALSE, labels = FALSE, theme_dendro = TRUE) + 
  labs(title = "Dendograma")

# En el eje horizontal del dendrograma tenemos cada uno de los datos que componen el conjunto de entrada, 
# mientras que en el eje vertical se representa la distancia euclídea que existe entre cada grupo a medida 
# que éstos se van jerarquizando. Cada línea vertical del diagrama representa un agrupamiento que coincide con los puntos 
# arropados por ésta, y como se ve en el dendrograma, estos van formándose progresivamente hasta tener un solo gran grupo 
# determinado por la línea horizontal superior. A

# Así, para nuestros datos, veamos los resultados para k = 3, .
# A fin de obtener los resultados del agrupamiento, se hace uso de la función cutree, incorporando como parámetro tanto 
# el modelo de agrupamiento como la cantidad de clases k:

# Se crearan tres grupos ( k=3)
grupos_hc3 <- cutree(clus_hc, k = 3) # se almacena las etiquetas


# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 

accidentes <- cbind(accidentes, cluster_hc = grupos_hc3)

# Guardamos los resultados del cluster

# ++++++++++++++++++++++++
# otra forma de hacer el agrupamiento jerarquico con hcut()  
res <- hcut(preprocessed, k = 3, stand = TRUE)
res$size

## [1] 174 116  28

# Visualizamos
fviz_dend(res, rect = TRUE, cex = 0.5,
          k_colors = c("#00AFBB","#2E9FDF", "#E7B800"))

# Visualización cluster
fviz_cluster(res, ellipse.type = "convex")

fviz_silhouette(res)

##   cluster size ave.sil.width
## 1       1  174          0.69
## 2       2  116          0.25
## 3       3   28          0.29

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método  
accidentes <- cbind(accidentes, cluster_hcut3 = res$cluster)

# Modelo de Enhanced hierarchical clustering, con tres grupos -- Usando eclust
# +++++++++++++++++++++
res.hc <- eclust(preprocessed, "hclust", k = 3, graph = FALSE)
print(res.hc$size) # tamaño del cluster

## [1] 174 116  28

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes <- cbind(accidentes, cluster_enhHcut3 = res.hc$cluster)

# Visualizamos

fviz_dend(res.hc, rect = TRUE, show_labels = FALSE)

fviz_cluster(res.hc, ellipse.type = "convex")

fviz_cluster(res.hc, ellipse.type = "convex", palette="jco", labelsize = 8)

# Visualize the silhouette plot
fviz_silhouette(res.hc)

##   cluster size ave.sil.width
## 1       1  174          0.69
## 2       2  116          0.25
## 3       3   28          0.29

fviz_silhouette(res.hc, palette="jco")

##   cluster size ave.sil.width
## 1       1  174          0.69
## 2       2  116          0.25
## 3       3   28          0.29

## Clustering por  K-means
# Agrupamiento por K-Medios (K-Means Clustering)
# El método de K-Medios basa su funcionamiento en agrupar los datos de entrada en un total de k conjuntos definidos por un
# centroide, cuya distancia con los puntos que pertenecen a cada uno de los datos es la menor posible. 
# En términos generales, el algoritmo puede resumirse como:
# Definir un total de k centroides al azar.
# Calcular las distancias de cada uno de los puntos de entrada a los k centroides, y asignar cada punto al centroide 
# cuya distancia sea menor.
# Actualizar la posición de los k centroides, calculando la posición promedio de todos los puntos que pertenecen a cada clase.
# Repetir los pasos 2 y 3 hasta que los centroides no cambien de posición y, por lo tanto, las asignaciones de puntos entre clases no cambie.
# Sin embargo, la cantidad óptima de centroides k a utilizar no necesariamente se conoce de antemano, por lo que es necesario aplicar 
# una técnica conocida como el Método del Codo o Elbow Method a fin de determinar dicho valor.
# Básicamente, este método busca seleccionar la cantidad ideal de grupos a partir de la optimización de la WCSS (Within Clusters Summed Squares).

# La función kmeans recibe dos parametros: datos (solo las variables numericas) y k (número de grupos a formar).

set.seed(1234)
kmeansb <- kmeans(preprocessed, 3,iter.max = 1000, nstart=25)
# iter.max son el máximo de iteraciones a aplicar al algoritmo, y nstart es la cantidad de conjuntos de 
# centroides que emplea internamente el mismo para ejecutar sus cálculos.

kmeansb

## K-means clustering with 3 clusters of sizes 100, 190, 28
## 
## Cluster means:
##   Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero Cant.Marzo
## 1          0.2757212           0.4178284  0.3178879    0.3121736  0.3284424
## 2         -0.5399939          -0.5959389 -0.5535021   -0.5483857 -0.5563943
## 3          2.6795261           2.5516266  2.6205933    2.6062826  2.6025239
##   Cant.Abril  Cant.Mayo Cant.Junio Cant.Julio Cant.Agosto Cant.Septiembre
## 1  0.2856990  0.3380426  0.3123922  0.2999523   0.3067199       0.3145019
## 2 -0.5495121 -0.5628420 -0.5572585 -0.5515426  -0.5544604      -0.5529425
## 3  2.7084783  2.6119901  2.6657108  2.6713520   2.6669816       2.6288887
##   Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## 1     0.297177      0.2651240      0.2783776      0.3340632
## 2    -0.550587     -0.5354035     -0.5380507     -0.4640159
## 3     2.674780      2.6862237      2.6568523      1.9555964
##   Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## 1            0.5191942   0.2382901           -0.05607722   -0.01834908
## 2           -0.5932910  -0.5197936           -0.05607722   -0.16484205
## 3            2.1716384   2.6761351            0.58079973    1.18410347
##    Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles Prom.Jueves
## 1  0.4684082        0.3881370  0.2869085   0.2528078      0.2651069   0.2771974
## 2 -0.5923719       -0.5680917 -0.5455607  -0.5291238     -0.5335053  -0.5390950
## 3  2.3467803        2.4687042  2.6773458   2.6875977      2.6734043   2.6681542
##   Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto Cant.Solodaños
## 1    0.2771217   0.3281517    0.5469088   0.4337867   0.2247644      0.1796823
## 2   -0.5397831  -0.5605474   -0.6181230  -0.6005043  -0.4741052     -0.4847847
## 3    2.6730933   2.6317445    2.2411600   2.5256126   2.4144126      2.6478882
## 
## Clustering vector:
##   [1] 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 1 1 2 1 1 3 2 2 2 2 3 2 2 1 2 2
##  [38] 2 1 1 2 2 3 1 2 1 2 3 1 1 2 1 2 3 1 1 3 3 2 3 1 2 2 2 1 3 2 2 3 1 1 2 1 1
##  [75] 2 2 2 3 2 2 2 2 1 2 1 2 2 2 1 2 1 2 1 2 2 1 2 2 2 1 2 2 2 2 1 2 1 1 1 2 1
## [112] 2 2 1 1 2 2 1 2 2 3 3 1 2 2 3 2 2 1 3 1 1 2 3 1 1 2 1 2 2 1 1 2 2 2 1 2 2
## [149] 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 1 1
## [186] 2 2 1 2 1 2 3 3 2 2 1 1 1 1 1 2 2 2 2 1 1 2 2 1 2 2 3 2 2 2 2 2 2 1 2 2 2
## [223] 2 2 1 2 1 1 2 2 3 2 1 2 2 2 2 2 2 2 2 3 1 1 2 3 1 3 1 1 2 1 2 1 2 2 2 2 1
## [260] 1 2 2 1 2 3 1 2 2 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 3 1
## [297] 2 1 1 2 2 1 3 2 1 2 3 2 1 2 1 2 2 3 2 2 2 2
## 
## Within cluster sum of squares by cluster:
## [1]  623.9744  356.9394 1420.9900
##  (between_SS / total_SS =  75.6 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

str(kmeansb)

## List of 9
##  $ cluster     : int [1:318] 2 2 2 1 1 1 2 2 2 2 ...
##  $ centers     : num [1:3, 1:31] 0.276 -0.54 2.68 0.418 -0.596 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "1" "2" "3"
##   .. ..$ : chr [1:31] "Cant.LunesAViernes" "Cant.SabadoYDomingo" "Cant.Enero" "Cant.Febrero" ...
##  $ totss       : num 9827
##  $ withinss    : num [1:3] 624 357 1421
##  $ tot.withinss: num 2402
##  $ betweenss   : num 7425
##  $ size        : int [1:3] 100 190 28
##  $ iter        : int 2
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"

# Con la siguiente sentencia se obtiene el grupo al cual pertenecen los registros o filas del set de datos según la función: kmeans .
kmeansb$cluster

##   [1] 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 1 1 2 1 1 3 2 2 2 2 3 2 2 1 2 2
##  [38] 2 1 1 2 2 3 1 2 1 2 3 1 1 2 1 2 3 1 1 3 3 2 3 1 2 2 2 1 3 2 2 3 1 1 2 1 1
##  [75] 2 2 2 3 2 2 2 2 1 2 1 2 2 2 1 2 1 2 1 2 2 1 2 2 2 1 2 2 2 2 1 2 1 1 1 2 1
## [112] 2 2 1 1 2 2 1 2 2 3 3 1 2 2 3 2 2 1 3 1 1 2 3 1 1 2 1 2 2 1 1 2 2 2 1 2 2
## [149] 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 1 1
## [186] 2 2 1 2 1 2 3 3 2 2 1 1 1 1 1 2 2 2 2 1 1 2 2 1 2 2 3 2 2 2 2 2 2 1 2 2 2
## [223] 2 2 1 2 1 1 2 2 3 2 1 2 2 2 2 2 2 2 2 3 1 1 2 3 1 3 1 1 2 1 2 1 2 2 2 2 1
## [260] 1 2 2 1 2 3 1 2 2 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 3 1
## [297] 2 1 1 2 2 1 3 2 1 2 3 2 1 2 1 2 2 3 2 2 2 2

class(kmeansb)

## [1] "kmeans"

kmeansb$size # cantidad por grupo

## [1] 100 190  28

print(kmeansb$centers) # centroides

##   Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero Cant.Marzo
## 1          0.2757212           0.4178284  0.3178879    0.3121736  0.3284424
## 2         -0.5399939          -0.5959389 -0.5535021   -0.5483857 -0.5563943
## 3          2.6795261           2.5516266  2.6205933    2.6062826  2.6025239
##   Cant.Abril  Cant.Mayo Cant.Junio Cant.Julio Cant.Agosto Cant.Septiembre
## 1  0.2856990  0.3380426  0.3123922  0.2999523   0.3067199       0.3145019
## 2 -0.5495121 -0.5628420 -0.5572585 -0.5515426  -0.5544604      -0.5529425
## 3  2.7084783  2.6119901  2.6657108  2.6713520   2.6669816       2.6288887
##   Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## 1     0.297177      0.2651240      0.2783776      0.3340632
## 2    -0.550587     -0.5354035     -0.5380507     -0.4640159
## 3     2.674780      2.6862237      2.6568523      1.9555964
##   Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## 1            0.5191942   0.2382901           -0.05607722   -0.01834908
## 2           -0.5932910  -0.5197936           -0.05607722   -0.16484205
## 3            2.1716384   2.6761351            0.58079973    1.18410347
##    Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles Prom.Jueves
## 1  0.4684082        0.3881370  0.2869085   0.2528078      0.2651069   0.2771974
## 2 -0.5923719       -0.5680917 -0.5455607  -0.5291238     -0.5335053  -0.5390950
## 3  2.3467803        2.4687042  2.6773458   2.6875977      2.6734043   2.6681542
##   Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto Cant.Solodaños
## 1    0.2771217   0.3281517    0.5469088   0.4337867   0.2247644      0.1796823
## 2   -0.5397831  -0.5605474   -0.6181230  -0.6005043  -0.4741052     -0.4847847
## 3    2.6730933   2.6317445    2.2411600   2.5256126   2.4144126      2.6478882

# Visualize kmeans clustering
fviz_cluster(kmeansb, data = preprocessed, ellipse.type = "convex", ellipse = TRUE)

fviz_cluster(kmeansb, data = preprocessed, ellipse.type = "norm", ellipse = TRUE) # variacion del ellise.type

# Visualizacion Show points only
fviz_cluster(kmeansb, data = preprocessed, geom = "point")

# Visualizacion Show text only
fviz_cluster(kmeansb, data = preprocessed, geom = "text")

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes <- cbind(accidentes, cluster_KM3 = kmeansb$cluster)

#  PAM clustering
# +++++++++++++++++++++

pam.res <- pam(preprocessed, 3)
print(pam.res$medoids)

##      Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero Cant.Marzo
## [1,]         -0.6688667          -0.7238666 -0.6296357   -0.6415364 -0.7516725
## [2,]          0.1382430           0.2534733 -0.0777776    0.2626649  0.1115138
## [3,]          2.4427732           2.3295978  2.5142830    2.5750485  1.8944888
##      Cant.Abril  Cant.Mayo  Cant.Junio Cant.Julio Cant.Agosto Cant.Septiembre
## [1,] -0.6983843 -0.6814624 -0.68124862 -0.6540477  -0.6796086      -0.7622864
## [2,]  0.1530736  0.1721407  0.07296601  0.2586694   0.2236956       0.1389059
## [3,]  2.5487011  2.6778788  2.49828363  2.7581103   2.5882271       2.3645779
##      Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## [1,]   -0.6447143     -0.6856195     -0.6336260   -0.448375161
## [2,]    0.2570093      0.1500090      0.1740731   -0.001405565
## [3,]    2.2927186      2.0373767      2.2629498    0.965015183
##      Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## [1,]           -0.6677850  -0.6643222           -0.05607722    -0.2530908
## [2,]            0.2885565   0.1440817           -0.05607722    -0.2530908
## [3,]            1.9302760   2.5265793           -0.05607722    -0.2530908
##      Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles
## [1,] -0.713033       -0.7396584 -0.7201636 -0.61317413    -0.68757059
## [2,]  0.175656        0.5727161  0.1721474  0.06813672     0.03678655
## [3,]  2.519734        1.7157520  2.5103326  2.08338253     2.51290534
##      Prom.Jueves Prom.Viernes Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto
## [1,]  -0.5875415   -0.6673898  -0.7152623   -0.7211354  -0.7286452  -0.5227079
## [2,]   0.2733880    0.1293061   0.0887604    0.5348448   0.4998497   0.0000000
## [3,]   2.4828530    2.6189810   2.4576013    1.9395595   1.9991148   1.5681236
##      Cant.Solodaños
## [1,]     -0.6091301
## [2,]     -0.1384130
## [3,]      2.6880790

print(pam.res$clusinfo)

##      size  max_diss  av_diss  diameter separation
## [1,]  173  3.753545 1.071213  4.446303  0.8021127
## [2,]  115  6.144416 2.354976  8.065085  0.8021127
## [3,]   30 18.161751 6.424774 25.784204  2.1316785

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes <- cbind(accidentes, cluster_pam3 = pam.res$clustering)

# Visualize pam clustering
fviz_cluster(pam.res, geom = "point", ellipse.type = "norm")

fviz_cluster(pam.res, geom = "point", ellipse.type = "convex")

clusplot(pam.res)

fviz_silhouette(pam.res)

##   cluster size ave.sil.width
## 1       1  173          0.69
## 2       2  115          0.28
## 3       3   30          0.26

# Se oserva de la grafica de Silhouette que algunos barrios quedaron mal agrupados ( valores negativos en el grupo 2 y 3 )

## Clara clustering, con 3 grupos --
# +++++++++++++++++++++
clarax <- clara(preprocessed, 3)

# información de los cluster
clarax$clusinfo

##      size  max_diss   av_diss isolation
## [1,]  146  3.798411 0.8761162  1.104351
## [2,]  143  7.289565 2.4177546  2.119370
## [3,]   29 18.392023 6.4999886  1.305015

# datos de los Medoids escalados
clarax$medoids

##      Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero  Cant.Marzo
## [1,]        -0.66736926         -0.77013120 -0.7132505   -0.7156512 -0.62431714
## [2,]        -0.08038041          0.05106566 -0.2115614    0.0106744  0.08321262
## [3,]         2.75273928          2.46839162  2.8989114    2.5157566  2.44636202
##      Cant.Abril   Cant.Mayo    Cant.Junio  Cant.Julio Cant.Agosto
## [1,] -0.6983843 -0.61262344 -0.7256141821 -0.66808954 -0.67960857
## [2,] -0.2365766 -0.02060839 -0.0009766005 -0.05025022  0.03772122
## [3,]  2.7218790  2.67787881  2.8236311154  2.96873735  2.82733707
##      Cant.Septiembre Cant.Octubre Cant.Noviembre Cant.Diciembre Cant.Atropello
## [1,]     -0.72132315  -0.69936424     -0.7288416   -0.675403486    -0.53293698
## [2,]     -0.07956496  -0.02990278     -0.1669535   -0.006962923    -0.03764634
## [3,]      2.69228423   2.44300587      2.3687466    2.931390386     1.04957700
##      Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello Cant.Incendio
## [1,]          -0.74748010 -0.66432215           -0.05607722    -0.2530908
## [2,]          -0.04616302 -0.05841474           -0.05607722    -0.2530908
## [3,]           1.93027602  2.88569413           -0.05607722    -0.2530908
##       Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes Prom.Miercoles
## [1,] -0.6872739      -0.61265446 -0.6953772 -0.64186091    -0.67278780
## [2,] -0.0304168       0.02236548 -0.1335518 -0.06812545    -0.04451885
## [3,]  2.2492637       2.47777592  2.7168860  2.86509709     2.76421292
##      Prom.Jueves Prom.Viernes  Prom.Sabado Prom.Domingo Cant.Herido Cant.Muerto
## [1,]  -0.6923007  -0.64604975 -0.672035236   -0.8202917 -0.73115232  -0.6969438
## [2,]  -0.0923166  -0.09120792 -0.006339057    0.1382195 -0.03667667  -0.3484719
## [3,]   2.8028445   2.59764094  2.595927824    2.0717679  2.04424317   1.3938877
##      Cant.Solodaños
## [1,]    -0.62007701
## [2,]    -0.06178463
## [3,]     3.15660675

# Cluster plot
fviz_cluster(clarax, stand = T, geom = "point",
             pointsize = 1)

plot(silhouette(clarax),  col = 2:3, main = "Silhouette plot")

fviz_cluster(clarax, ellipse.type = "convex")

fviz_cluster(clarax)

fviz_silhouette(clarax)

##   cluster size ave.sil.width
## 1       1   20          0.64
## 2       2   21          0.19
## 3       3    5          0.35

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes <- cbind(accidentes, cluster_clara3 = clarax$clustering)

Se oserva de la grafica de Silhouette que algunos barrios quedaron mal agrupados ( valores negativos en el grupo 2 )

##  EM clustering - Otra técnica de clustering
# +++++++++++++++++++++

em.res <- Mclust(preprocessed, 3)
summary(em.res)

## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VEI (diagonal, equal shape) model with 3 components: 
## 
##  log-likelihood   n  df       BIC       ICL
##       -1703.023 318 128 -4143.589 -4143.663
## 
## Clustering table:
##   1   2   3 
##  87 130 101

fviz_mclust(em.res, "BIC", ellipse.tye="ellipse", palette="jco")

fviz_mclust(em.res, "classification", geom="point", palette="jco")

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes<- cbind(accidentes, cluster_em3 = em.res$classification)

#+++++++++++++++++++++
##  Fuzzy clustering
# +++++++++++++++++++++

res_FCM<-fcm(preprocessed, centers=3, nstart=5)
round(head(res_FCM$u),3)

##   Cluster 1 Cluster 2 Cluster 3
## 1     0.002     0.979     0.020
## 2     0.002     0.978     0.020
## 3     0.015     0.669     0.317
## 4     0.011     0.096     0.894
## 5     0.031     0.094     0.875
## 6     0.014     0.182     0.804

res_FCM<-ppclust2(res_FCM, "kmeans")

fviz_cluster(res_FCM, data=preprocessed, ellipse=TRUE, geom="point", palette="jco")

print(res_FCM$centers)

##           Cant.LunesAViernes Cant.SabadoYDomingo Cant.Enero Cant.Febrero
## Cluster 1          2.9561122           2.7751964  2.8910788    2.8698207
## Cluster 2         -0.5408116          -0.5967405 -0.5544166   -0.5505752
## Cluster 3          0.3465163           0.4827040  0.3898488    0.3862509
##           Cant.Marzo Cant.Abril  Cant.Mayo Cant.Junio Cant.Julio Cant.Agosto
## Cluster 1  2.8884771  2.9643251  2.8859308  2.9092804  2.9271220   2.9226262
## Cluster 2 -0.5588042 -0.5491592 -0.5609136 -0.5556365 -0.5536051  -0.5549653
## Cluster 3  0.3972716  0.3547924  0.4124841  0.3751505  0.3758290   0.3765390
##           Cant.Septiembre Cant.Octubre Cant.Noviembre Cant.Diciembre
## Cluster 1       2.8985954    2.9248262      2.9472039      2.9560161
## Cluster 2      -0.5553525   -0.5500755     -0.5381139     -0.5390821
## Cluster 3       0.3820101    0.3656105      0.3359483      0.3391632
##           Cant.Atropello Cant.CaidadeOcupante Cant.Choque Cant.ChoqueyAtropello
## Cluster 1      2.0831174            2.3488022   2.9613348            0.12102718
## Cluster 2     -0.4590509           -0.5933615  -0.5214924           -0.04839200
## Cluster 3      0.3549332            0.5683959   0.3119756           -0.02338138
##           Cant.Incendio  Cant.Otro Cant.Volcamiento Prom.Lunes Prom.Martes
## Cluster 1   0.940872518  2.5827118        2.6041310  2.9409172   2.9613383
## Cluster 2  -0.198058808 -0.5915561       -0.5711099 -0.5460295  -0.5302200
## Cluster 3   0.001700511  0.5362145        0.4656899  0.3577663   0.3241188
##           Prom.Miercoles Prom.Jueves Prom.Viernes Prom.Sabado Prom.Domingo
## Cluster 1      2.9630493   2.9480059    2.9485308   2.8990749    2.3690470
## Cluster 2     -0.5350260  -0.5399722   -0.5393795  -0.5602725   -0.6208296
## Cluster 3      0.3340993   0.3476416    0.3500036   0.3922335    0.6103348
##           Cant.Herido Cant.Muerto Cant.Solodaños
## Cluster 1   2.6950283   2.5102618      2.9877768
## Cluster 2  -0.5998527  -0.4719835     -0.4868794
## Cluster 3   0.5090057   0.2943235      0.2411929

print(res_FCM$size)

##   1   2   3 
##  27 195  96

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes <- cbind(accidentes, cluster_FCM3 = res_FCM$cluster)

# clustering por Dbscan
dbscan::kNNdistplot(preprocessed, k=5)
abline(h=3, lty=2)

res_DBSCAN<- fpc::dbscan(preprocessed, eps=3, MinPts=5) # el  número de puntos para el cluster 5 
print(res_DBSCAN)

## dbscan Pts=318 MinPts=5 eps=3
##         0   1  2 3
## border 20   4  1 4
## seed    0 278 10 1
## total  20 282 11 5

fviz_cluster(res_DBSCAN, data=preprocessed, ellipse.type = "True", geom="point", palette="jco")

fviz_cluster(res_DBSCAN, data=preprocessed, ellipse = TRUE, geom="point", palette="jco")

fviz_cluster(res_DBSCAN, data=preprocessed, ellipse = TRUE, geom="point", palette="jco", show.clust.cent = TRUE, ellipse.type = "convex")

# De este modelo de agrupamiento se recomendaría tres grupos
# los puntos negros son considerados atipicos por este metodo

# Etiquetamos y adicionamos una columna con las etiquetas de acuerdo a este método 
accidentes <- cbind(accidentes, cluster_DBSCAN3 = res_DBSCAN$cluster)

res_PCA<- PCA(preprocessed, ncp=3, graph=FALSE)
res_HCPC<- HCPC(res_PCA, graph=FALSE)
plot(res_HCPC, choice= "3D.map")

fviz_cluster(res_HCPC, ellipse.type="convex", palette="jco", labelsize = 8)

# Validacion de los agrupamientos

# Almacenamos las métricas del agrupamiento obtenido por  la técnica de K-means

res_statsKmeans <- cluster.stats(distancia, kmeansb$cluster)
res_statsKmeans

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1] 100 190  28
## 
## $min.cluster.size
## [1] 28
## 
## $noisen
## [1] 0
## 
## $diameter
## [1]  8.333660  5.043008 25.784204
## 
## $average.distance
## [1] 3.252148 1.654911 9.074453
## 
## $median.distance
## [1] 3.053169 1.476254 7.941882
## 
## $separation
## [1] 0.539555 0.539555 2.974417
## 
## $average.toother
## [1]  6.196561  7.872913 16.170422
## 
## $separation.matrix
##          [,1]     [,2]     [,3]
## [1,] 0.000000 0.539555 2.974417
## [2,] 0.539555 0.000000 8.889425
## [3,] 2.974417 8.889425 0.000000
## 
## $ave.between.matrix
##           [,1]      [,2]     [,3]
## [1,]  0.000000  5.138169 13.37850
## [2,]  5.138169  0.000000 17.63985
## [3,] 13.378502 17.639853  0.00000
## 
## $average.between
## [1] 8.441336
## 
## $average.within
## [1] 2.81048
## 
## $n.between
## [1] 27120
## 
## $n.within
## [1] 23283
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 0.539555
## 
## $within.cluster.ss
## [1] 2401.904
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.3173740 0.6527521 0.2722472 
## 
## $avg.silwidth
## [1] 0.5137837
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.5615967
## 
## $dunn
## [1] 0.0209258
## 
## $dunn2
## [1] 0.5662236
## 
## $entropy
## [1] 0.8854686
## 
## $wb.ratio
## [1] 0.3329425
## 
## $ch
## [1] 486.8857
## 
## $cwidegap
## [1]  3.420281  3.359577 17.971735
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.8757724
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento obtenido por la técnica de Fuzzy c-means

res_statsFCM <- cluster.stats(distancia, res_FCM$cluster)
res_statsFCM

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1]  27 195  96
## 
## $min.cluster.size
## [1] 27
## 
## $noisen
## [1] 0
## 
## $diameter
## [1] 25.784204  5.046271  9.267822
## 
## $average.distance
## [1] 9.143035 1.714259 3.333081
## 
## $median.distance
## [1] 7.957277 1.535483 3.102970
## 
## $separation
## [1] 1.9889781 0.7158319 0.7158319
## 
## $average.toother
## [1] 16.379180  8.024550  6.264823
## 
## $separation.matrix
##          [,1]      [,2]      [,3]
## [1,] 0.000000 9.5371612 1.9889781
## [2,] 9.537161 0.0000000 0.7158319
## [3,] 1.988978 0.7158319 0.0000000
## 
## $ave.between.matrix
##          [,1]      [,2]      [,3]
## [1,]  0.00000 17.819958 13.452601
## [2,] 17.81996  0.000000  5.269592
## [3,] 13.45260  5.269592  0.000000
## 
## $average.between
## [1] 8.553937
## 
## $average.within
## [1] 2.833705
## 
## $n.between
## [1] 26577
## 
## $n.within
## [1] 23826
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 0.7158319
## 
## $within.cluster.ss
## [1] 2407.426
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.2722845 0.6478035 0.3175746 
## 
## $avg.silwidth
## [1] 0.5162281
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.5707497
## 
## $dunn
## [1] 0.02776242
## 
## $dunn2
## [1] 0.5763504
## 
## $entropy
## [1] 0.8708566
## 
## $wb.ratio
## [1] 0.331275
## 
## $ch
## [1] 485.4077
## 
## $cwidegap
## [1] 17.971735  3.359577  3.420281
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.9135188
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento obtenido por la técnica de EM
res_statsem <- cluster.stats(distancia, em.res$classification)
res_statsem

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1]  87 130 101
## 
## $min.cluster.size
## [1] 87
## 
## $noisen
## [1] 0
## 
## $diameter
## [1]  1.382457  4.554847 32.194573
## 
## $average.distance
## [1] 0.519622 1.800516 7.779795
## 
## $median.distance
## [1] 0.4909074 1.6786867 5.6826626
## 
## $separation
## [1] 0.3602228 0.3602228 0.9577323
## 
## $average.toother
## [1] 5.829219 5.471969 8.796795
## 
## $separation.matrix
##           [,1]      [,2]      [,3]
## [1,] 0.0000000 0.3602228 3.3595765
## [2,] 0.3602228 0.0000000 0.9577323
## [3,] 3.3595765 0.9577323 0.0000000
## 
## $ave.between.matrix
##           [,1]     [,2]      [,3]
## [1,]  0.000000 2.567877 10.026986
## [2,]  2.567877 0.000000  7.973514
## [3,] 10.026986 7.973514  0.000000
## 
## $average.between
## [1] 6.67656
## 
## $average.within
## [1] 3.349162
## 
## $n.between
## [1] 33227
## 
## $n.within
## [1] 17176
## 
## $max.diameter
## [1] 32.19457
## 
## $min.separation
## [1] 0.3602228
## 
## $within.cluster.ss
## [1] 4995.646
## 
## $clus.avg.silwidths
##           1           2           3 
##  0.79116536  0.20655270 -0.08763429 
## 
## $avg.silwidth
## [1] 0.2730571
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.2866883
## 
## $dunn
## [1] 0.01118893
## 
## $dunn2
## [1] 0.3300701
## 
## $entropy
## [1] 1.084565
## 
## $wb.ratio
## [1] 0.5016299
## 
## $ch
## [1] 152.3203
## 
## $cwidegap
## [1]  0.4910698  1.2835604 17.9717351
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.4759967
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento obtenido por la técnica de Clara
res_statsClara <- cluster.stats(distancia, clarax$clustering)
res_statsClara

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1] 146 143  29
## 
## $min.cluster.size
## [1] 29
## 
## $noisen
## [1] 0
## 
## $diameter
## [1]  4.431639  9.033757 25.784204
## 
## $average.distance
## [1] 1.178484 3.267940 9.085147
## 
## $median.distance
## [1] 1.005725 3.050881 7.941882
## 
## $separation
## [1] 0.4870405 0.4870405 2.1316785
## 
## $average.toother
## [1]  6.799619  6.127583 15.936436
## 
## $separation.matrix
##           [,1]      [,2]     [,3]
## [1,] 0.0000000 0.4870405 8.895361
## [2,] 0.4870405 0.0000000 2.131678
## [3,] 8.8953614 2.1316785 0.000000
## 
## $ave.between.matrix
##           [,1]      [,2]     [,3]
## [1,]  0.000000  4.562974 17.82860
## [2,]  4.562974  0.000000 14.00458
## [3,] 17.828596 14.004580  0.00000
## 
## $average.between
## [1] 7.820809
## 
## $average.within
## [1] 2.83913
## 
## $n.between
## [1] 29259
## 
## $n.within
## [1] 21144
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 0.4870405
## 
## $within.cluster.ss
## [1] 2563.173
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.7296311 0.1975532 0.3065426 
## 
## $avg.silwidth
## [1] 0.4517798
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.4821438
## 
## $dunn
## [1] 0.0188891
## 
## $dunn2
## [1] 0.5022455
## 
## $entropy
## [1] 0.9351805
## 
## $wb.ratio
## [1] 0.3630225
## 
## $ch
## [1] 446.3425
## 
## $cwidegap
## [1]  3.359577  3.406564 17.971735
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.6443619
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento obtenido por la técnica de PAM
res_statspam <- cluster.stats(distancia, pam.res$clustering)
res_statspam

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1] 173 115  30
## 
## $min.cluster.size
## [1] 30
## 
## $noisen
## [1] 0
## 
## $diameter
## [1]  4.446303  8.065085 25.784204
## 
## $average.distance
## [1] 1.423992 3.189211 9.080042
## 
## $median.distance
## [1] 1.270701 3.036192 7.919501
## 
## $separation
## [1] 0.8021127 0.8021127 2.1316785
## 
## $average.toother
## [1]  7.425009  6.092475 15.714680
## 
## $separation.matrix
##           [,1]      [,2]     [,3]
## [1,] 0.0000000 0.8021127 8.332451
## [2,] 0.8021127 0.0000000 2.131678
## [3,] 8.3324515 2.1316785 0.000000
## 
## $ave.between.matrix
##           [,1]      [,2]     [,3]
## [1,]  0.000000  4.843185 17.32200
## [2,]  4.843185  0.000000 13.29672
## [3,] 17.321997 13.296715  0.00000
## 
## $average.between
## [1] 8.134922
## 
## $average.within
## [1] 2.784626
## 
## $n.between
## [1] 28535
## 
## $n.within
## [1] 21868
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 0.8021127
## 
## $within.cluster.ss
## [1] 2458.725
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.6872067 0.2828942 0.2619744 
## 
## $avg.silwidth
## [1] 0.5008768
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.5320828
## 
## $dunn
## [1] 0.03110868
## 
## $dunn2
## [1] 0.533388
## 
## $entropy
## [1] 0.9217289
## 
## $wb.ratio
## [1] 0.3423052
## 
## $ch
## [1] 471.9939
## 
## $cwidegap
## [1]  3.359577  3.406564 17.971735
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.9326932
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento jerarquico
res_statshc <- cluster.stats(distancia, res.hc$cluster)
res_statshc

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1] 174 116  28
## 
## $min.cluster.size
## [1] 28
## 
## $noisen
## [1] 0
## 
## $diameter
## [1]  3.915476 11.455567 25.784204
## 
## $average.distance
## [1] 1.407984 3.407333 9.074453
## 
## $median.distance
## [1] 1.283931 3.209226 7.941882
## 
## $separation
## [1] 0.7254758 0.7254758 2.9744173
## 
## $average.toother
## [1]  7.430223  6.152686 16.170422
## 
## $separation.matrix
##           [,1]      [,2]     [,3]
## [1,] 0.0000000 0.7254758 8.889425
## [2,] 0.7254758 0.0000000 2.974417
## [3,] 8.8894249 2.9744173 0.000000
## 
## $ave.between.matrix
##           [,1]      [,2]     [,3]
## [1,]  0.000000  4.930579 17.78589
## [2,]  4.930579  0.000000 13.74721
## [3,] 17.785894 13.747214  0.00000
## 
## $average.between
## [1] 8.155124
## 
## $average.within
## [1] 2.812341
## 
## $n.between
## [1] 28304
## 
## $n.within
## [1] 22099
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 0.7254758
## 
## $within.cluster.ss
## [1] 2464.298
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.6937665 0.2489556 0.2949115 
## 
## $avg.silwidth
## [1] 0.4963891
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.5312186
## 
## $dunn
## [1] 0.02813644
## 
## $dunn2
## [1] 0.5433472
## 
## $entropy
## [1] 0.9117564
## 
## $wb.ratio
## [1] 0.3448558
## 
## $ch
## [1] 470.5703
## 
## $cwidegap
## [1]  1.283560  3.406564 17.971735
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.8854602
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento obtenido por DBSCAN
res_statsDBSCAN <- cluster.stats(distancia, res_DBSCAN$cluster)
res_statsDBSCAN

## $n
## [1] 318
## 
## $cluster.number
## [1] 4
## 
## $cluster.size
## [1]  20 282  11   5
## 
## $min.cluster.size
## [1] 5
## 
## $noisen
## [1] 0
## 
## $diameter
## [1] 25.784204 13.815379  6.949548  3.968731
## 
## $average.distance
## [1] 10.462179  3.462018  3.329740  2.793879
## 
## $median.distance
## [1] 9.385089 2.816029 3.256945 2.671013
## 
## $separation
## [1] 2.791048 2.901163 3.100635 2.791048
## 
## $average.toother
## [1] 17.204364 13.175217  5.958859 13.540982
## 
## $separation.matrix
##          [,1]     [,2]     [,3]     [,4]
## [1,] 0.000000 2.974390 3.100635 2.791048
## [2,] 2.974390 0.000000 3.359577 2.901163
## [3,] 3.100635 3.359577 0.000000 8.816628
## [4,] 2.791048 2.901163 8.816628 0.000000
## 
## $ave.between.matrix
##           [,1]      [,2]      [,3]      [,4]
## [1,]  0.000000 17.412691 15.912107  8.297707
## [2,] 17.412691  0.000000  5.126976 13.931449
## [3,] 15.912107  5.126976  0.000000 13.064063
## [4,]  8.297707 13.931449 13.064063  0.000000
## 
## $average.between
## [1] 13.1855
## 
## $average.within
## [1] 3.887198
## 
## $n.between
## [1] 10527
## 
## $n.within
## [1] 39876
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 2.791048
## 
## $within.cluster.ss
## [1] 3976.586
## 
## $clus.avg.silwidths
##          1          2          3          4 
## -0.2357448  0.3211812  0.3478799  0.6638160 
## 
## $avg.silwidth
## [1] 0.2924652
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.7013863
## 
## $dunn
## [1] 0.1082464
## 
## $dunn2
## [1] 0.4900486
## 
## $entropy
## [1] 0.4621882
## 
## $wb.ratio
## [1] 0.2948086
## 
## $ch
## [1] 153.9872
## 
## $cwidegap
## [1] 18.151249  2.974417  2.434518  2.664541
## 
## $widestgap
## [1] 18.15125
## 
## $sindex
## [1] 3.229239
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Almacenamos las métricas del agrupamiento obtenido por HCPC
res_statsHCPC<- cluster.stats(distancia, as.integer(res_HCPC$data.clust$clust))
res_statsHCPC

## $n
## [1] 318
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1] 194  96  28
## 
## $min.cluster.size
## [1] 28
## 
## $noisen
## [1] 0
## 
## $diameter
## [1]  5.046271  8.333660 25.784204
## 
## $average.distance
## [1] 1.703482 3.258378 9.074453
## 
## $median.distance
## [1] 1.524209 3.054849 7.941882
## 
## $separation
## [1] 0.6418116 0.6418116 2.9744173
## 
## $average.toother
## [1]  7.992519  6.213033 16.170422
## 
## $separation.matrix
##           [,1]      [,2]     [,3]
## [1,] 0.0000000 0.6418116 8.889425
## [2,] 0.6418116 0.0000000 2.974417
## [3,] 8.8894249 2.9744173 0.000000
## 
## $ave.between.matrix
##           [,1]      [,2]     [,3]
## [1,]  0.000000  5.191591 17.59570
## [2,]  5.191591  0.000000 13.29017
## [3,] 17.595702 13.290168  0.00000
## 
## $average.between
## [1] 8.524978
## 
## $average.within
## [1] 2.821901
## 
## $n.between
## [1] 26744
## 
## $n.within
## [1] 23659
## 
## $max.diameter
## [1] 25.7842
## 
## $min.separation
## [1] 0.6418116
## 
## $within.cluster.ss
## [1] 2403.17
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.6445399 0.3250922 0.2666298 
## 
## $avg.silwidth
## [1] 0.5148277
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.569075
## 
## $dunn
## [1] 0.02489166
## 
## $dunn2
## [1] 0.5721106
## 
## $entropy
## [1] 0.8770085
## 
## $wb.ratio
## [1] 0.3310156
## 
## $ch
## [1] 486.5462
## 
## $cwidegap
## [1]  3.359577  3.420281 17.971735
## 
## $widestgap
## [1] 17.97174
## 
## $sindex
## [1] 0.8801449
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

# Guardamos los datos anteriores en un dataframe
clustervalidacion <- as.data.frame(cbind(res_statsKmeans, res_statsFCM, res_statsem, res_statsClara, res_statspam, res_statshc,res_statsDBSCAN))


write_xlsx(clustervalidacion,"C:\\RDATA\\ClusterValidacionbarriosR.xlsx")
write_xlsx(accidentes,"C:\\RDATA\\BarriosyAccidentesEtiquetados.xlsx")

COMPARA_CLUS <-data.frame(res_statsKmeans$avg.silwidth, res_statsKmeans$dunn, res_statsKmeans$sindex, res_statsKmeans$wb.ratio,res_statsKmeans$ch, row.names="K-MEANS")

colnames(COMPARA_CLUS) <- c("SIL","DUNN", "SEP","WB","CH")

add_resultado<-function(dfcompara, vector_clases,nombre){
  res_stats <- cluster.stats(distancia, vector_clases)
  
  dfcompara<- rbind(dfcompara, c(res_stats$avg.sildwitdth, res_stats$dunn, res_stats$index, res_stats$wb.ratio))
  
  row.names(dfcompara)[nrow(dfcompara)]<- nombre
  return(dfcompara)
}
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, res_FCM$cluster, "FCM")
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, em.res$classification, "EM")
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, clarax$clustering, "CLARA")
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, pam.res$clustering, "PAM")
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, res.hc$cluster, "HC")
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, res_DBSCAN$cluster, "DBSCAN")
COMPARA_CLUS<-add_resultado(COMPARA_CLUS, as.integer(res_HCPC$data.clust$clust), "HCPC")

# tabla comparativa de los cluster
print(formattable(COMPARA_CLUS[order(COMPARA_CLUS[,1],decreasing=TRUE),],digits=2, format="f", row.names=TRUE, list(SIL=color_tile("red","green"),DUNN=color_tile("green","red"), SEP=color_tile("red", "green"),WB=color_tile("green","red"),CH=color_tile("green","red"))))

# según las metricas de Sillouethe y DUNN , el mejor agrupamiento se obtuvo con K-means

# columnas totales con las etiquetas de pertenencia a un cluster para las diferentes tecnicas
colnames(accidentes)

##  [1] "Barrio"                "Comuna"                "LONGITUD_BARRIO"      
##  [4] "LATITUD_BARRIO"        "Cant.LunesAViernes"    "Cant.SabadoYDomingo"  
##  [7] "Cant.Enero"            "Cant.Febrero"          "Cant.Marzo"           
## [10] "Cant.Abril"            "Cant.Mayo"             "Cant.Junio"           
## [13] "Cant.Julio"            "Cant.Agosto"           "Cant.Septiembre"      
## [16] "Cant.Octubre"          "Cant.Noviembre"        "Cant.Diciembre"       
## [19] "Cant.Atropello"        "Cant.CaidadeOcupante"  "Cant.Choque"          
## [22] "Cant.ChoqueyAtropello" "Cant.Incendio"         "Cant.Otro"            
## [25] "Cant.Volcamiento"      "Prom.Lunes"            "Prom.Martes"          
## [28] "Prom.Miercoles"        "Prom.Jueves"           "Prom.Viernes"         
## [31] "Prom.Sabado"           "Prom.Domingo"          "Cant.Herido"          
## [34] "Cant.Muerto"           "Cant.Solodaños"        "cluster_hc"           
## [37] "cluster_hcut3"         "cluster_enhHcut3"      "cluster_KM3"          
## [40] "cluster_pam3"          "cluster_clara3"        "cluster_em3"          
## [43] "cluster_FCM3"          "cluster_DBSCAN3"

BarriosYAccidentes<-accidentes[,-c(1,2,3,4,36,37,38,40,41,42,43,44)]
print(colnames(BarriosYAccidentes))

##  [1] "Cant.LunesAViernes"    "Cant.SabadoYDomingo"   "Cant.Enero"           
##  [4] "Cant.Febrero"          "Cant.Marzo"            "Cant.Abril"           
##  [7] "Cant.Mayo"             "Cant.Junio"            "Cant.Julio"           
## [10] "Cant.Agosto"           "Cant.Septiembre"       "Cant.Octubre"         
## [13] "Cant.Noviembre"        "Cant.Diciembre"        "Cant.Atropello"       
## [16] "Cant.CaidadeOcupante"  "Cant.Choque"           "Cant.ChoqueyAtropello"
## [19] "Cant.Incendio"         "Cant.Otro"             "Cant.Volcamiento"     
## [22] "Prom.Lunes"            "Prom.Martes"           "Prom.Miercoles"       
## [25] "Prom.Jueves"           "Prom.Viernes"          "Prom.Sabado"          
## [28] "Prom.Domingo"          "Cant.Herido"           "Cant.Muerto"          
## [31] "Cant.Solodaños"        "cluster_KM3"

BarriosYAccidentes$cluster_KM3<-as.factor(BarriosYAccidentes$cluster_KM3)

# analizando el modelo de agrupamiento obtenido utlizando clasificacion usando Arboles 
# Esto permitirá identificar cuales fueron las variables que incidieron en la obtención de los grupos

modelo.rpart<- rpart(cluster_KM3 ~ ., data=BarriosYAccidentes, method="class")
par(mar=c(0,0,0,0))
plot(modelo.rpart)
text(modelo.rpart, cex=0.75)

summary(modelo.rpart)

## Call:
## rpart(formula = cluster_KM3 ~ ., data = BarriosYAccidentes, method = "class")
##   n= 318 
## 
##         CP nsplit rel error    xerror       xstd
## 1 0.734375      0  1.000000 1.0000000 0.06832164
## 2 0.218750      1  0.265625 0.3046875 0.04569944
## 3 0.015625      2  0.046875 0.1015625 0.02758659
## 4 0.010000      3  0.031250 0.1171875 0.02953544
## 
## Variable importance
##          Cant.Abril Cant.SabadoYDomingo         Prom.Sabado         Cant.Herido 
##                  16                  13                  12                  12 
##         Cant.Agosto           Cant.Mayo          Cant.Junio  Cant.LunesAViernes 
##                  12                  11                   5                   5 
##          Cant.Enero          Cant.Julio        Cant.Octubre         Prom.Jueves 
##                   5                   5                   5                   1 
## 
## Node number 1: 318 observations,    complexity param=0.734375
##   predicted class=2  expected loss=0.4025157  P(node) =1
##     class counts:   100   190    28
##    probabilities: 0.314 0.597 0.088 
##   left son=2 (132 obs) right son=3 (186 obs)
##   Primary splits:
##       Cant.SabadoYDomingo < 147.5 to the right, improve=116.9556, (0 missing)
##       Prom.Sabado         < 15.5  to the right, improve=113.9351, (0 missing)
##       Cant.Herido         < 331.5 to the right, improve=109.9363, (0 missing)
##       Cant.Mayo           < 45.5  to the right, improve=106.1399, (0 missing)
##       Cant.Junio          < 42.5  to the right, improve=105.5360, (0 missing)
##   Surrogate splits:
##       Cant.Herido < 331.5 to the right, agree=0.975, adj=0.939, (0 split)
##       Prom.Sabado < 15.5  to the right, agree=0.962, adj=0.909, (0 split)
##       Cant.Agosto < 48.5  to the right, agree=0.953, adj=0.886, (0 split)
##       Cant.Mayo   < 44.5  to the right, agree=0.950, adj=0.879, (0 split)
##       Cant.Abril  < 40.5  to the right, agree=0.943, adj=0.864, (0 split)
## 
## Node number 2: 132 observations,    complexity param=0.21875
##   predicted class=1  expected loss=0.25  P(node) =0.4150943
##     class counts:    99     5    28
##    probabilities: 0.750 0.038 0.212 
##   left son=4 (104 obs) right son=5 (28 obs)
##   Primary splits:
##       Cant.Abril   < 149.5 to the left,  improve=42.10198, (0 missing)
##       Cant.Junio   < 144   to the left,  improve=42.10198, (0 missing)
##       Cant.Julio   < 147.5 to the left,  improve=42.10198, (0 missing)
##       Cant.Octubre < 163.5 to the left,  improve=42.10198, (0 missing)
##       Cant.Enero   < 134   to the left,  improve=40.21169, (0 missing)
##   Surrogate splits:
##       Cant.Junio         < 144   to the left,  agree=1.000, adj=1.000, (0 split)
##       Cant.Julio         < 147.5 to the left,  agree=1.000, adj=1.000, (0 split)
##       Cant.Octubre       < 163.5 to the left,  agree=1.000, adj=1.000, (0 split)
##       Cant.LunesAViernes < 1410  to the left,  agree=0.992, adj=0.964, (0 split)
##       Cant.Enero         < 126   to the left,  agree=0.992, adj=0.964, (0 split)
## 
## Node number 3: 186 observations
##   predicted class=2  expected loss=0.005376344  P(node) =0.5849057
##     class counts:     1   185     0
##    probabilities: 0.005 0.995 0.000 
## 
## Node number 4: 104 observations,    complexity param=0.015625
##   predicted class=1  expected loss=0.04807692  P(node) =0.327044
##     class counts:    99     5     0
##    probabilities: 0.952 0.048 0.000 
##   left son=8 (96 obs) right son=9 (8 obs)
##   Primary splits:
##       Prom.Sabado        < 16.1  to the right, improve=5.769231, (0 missing)
##       Cant.Junio         < 42    to the right, improve=5.074786, (0 missing)
##       Prom.Jueves        < 13.5  to the right, improve=5.074786, (0 missing)
##       Cant.LunesAViernes < 365   to the right, improve=4.519231, (0 missing)
##       Prom.Martes        < 15.3  to the right, improve=4.519231, (0 missing)
##   Surrogate splits:
##       Prom.Jueves        < 13.5  to the right, agree=0.990, adj=0.875, (0 split)
##       Cant.LunesAViernes < 365   to the right, agree=0.981, adj=0.750, (0 split)
##       Cant.Enero         < 34.5  to the right, agree=0.971, adj=0.625, (0 split)
##       Cant.Junio         < 39.5  to the right, agree=0.962, adj=0.500, (0 split)
##       Cant.Agosto        < 42    to the right, agree=0.962, adj=0.500, (0 split)
## 
## Node number 5: 28 observations
##   predicted class=3  expected loss=0  P(node) =0.08805031
##     class counts:     0     0    28
##    probabilities: 0.000 0.000 1.000 
## 
## Node number 8: 96 observations
##   predicted class=1  expected loss=0  P(node) =0.3018868
##     class counts:    96     0     0
##    probabilities: 1.000 0.000 0.000 
## 
## Node number 9: 8 observations
##   predicted class=2  expected loss=0.375  P(node) =0.02515723
##     class counts:     3     5     0
##    probabilities: 0.375 0.625 0.000

modelo.rpart$variable.importance

##          Cant.Abril Cant.SabadoYDomingo         Prom.Sabado         Cant.Herido 
##          143.109072          116.955578          112.092484          109.867361 
##         Cant.Agosto           Cant.Mayo          Cant.Junio  Cant.LunesAViernes 
##          106.549787          102.779145           44.986597           44.925262 
##          Cant.Enero          Cant.Julio        Cant.Octubre         Prom.Jueves 
##           44.204108           42.101981           42.101981            5.048077

modelo.rpart$frame

##                   var   n  wt dev yval complexity ncompete nsurrogate
## 1 Cant.SabadoYDomingo 318 318 128    2   0.734375        4          5
## 2          Cant.Abril 132 132  33    1   0.218750        4          5
## 4         Prom.Sabado 104 104   5    1   0.015625        4          5
## 8              <leaf>  96  96   0    1   0.010000        0          0
## 9              <leaf>   8   8   3    2   0.010000        0          0
## 5              <leaf>  28  28   0    3   0.010000        0          0
## 3              <leaf> 186 186   1    2   0.010000        0          0
##       yval2.V1     yval2.V2     yval2.V3     yval2.V4     yval2.V5     yval2.V6
## 1 2.000000e+00 1.000000e+02 1.900000e+02 2.800000e+01 3.144654e-01 5.974843e-01
## 2 1.000000e+00 9.900000e+01 5.000000e+00 2.800000e+01 7.500000e-01 3.787879e-02
## 4 1.000000e+00 9.900000e+01 5.000000e+00 0.000000e+00 9.519231e-01 4.807692e-02
## 8 1.000000e+00 9.600000e+01 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
## 9 2.000000e+00 3.000000e+00 5.000000e+00 0.000000e+00 3.750000e-01 6.250000e-01
## 5 3.000000e+00 0.000000e+00 0.000000e+00 2.800000e+01 0.000000e+00 0.000000e+00
## 3 2.000000e+00 1.000000e+00 1.850000e+02 0.000000e+00 5.376344e-03 9.946237e-01
##       yval2.V7 yval2.nodeprob
## 1 8.805031e-02   1.000000e+00
## 2 2.121212e-01   4.150943e-01
## 4 0.000000e+00   3.270440e-01
## 8 0.000000e+00   3.018868e-01
## 9 0.000000e+00   2.515723e-02
## 5 1.000000e+00   8.805031e-02
## 3 0.000000e+00   5.849057e-01

plotcp(modelo.rpart)

# c5
modelo.c50<- C5.0(cluster_KM3 ~ ., data=BarriosYAccidentes)
par(mar=c(0,0,0,0))

plot(modelo.c50)

summary(modelo.c50)

## 
## Call:
## C5.0.formula(formula = cluster_KM3 ~ ., data = BarriosYAccidentes)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Wed Sep 09 19:51:27 2020
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 318 cases (32 attributes) from undefined.data
## 
## Decision tree:
## 
## Cant.SabadoYDomingo <= 147: 2 (186/1)
## Cant.SabadoYDomingo > 147:
## :...Cant.Abril > 143: 3 (28)
##     Cant.Abril <= 143:
##     :...Cant.Enero <= 34: 2 (5/1)
##         Cant.Enero > 34: 1 (99/1)
## 
## 
## Evaluation on training data (318 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##       4    3( 0.9%)   <<
## 
## 
##     (a)   (b)   (c)    <-classified as
##    ----  ----  ----
##      98     2          (a): class 1
##       1   189          (b): class 2
##                  28    (c): class 3
## 
## 
##  Attribute usage:
## 
##  100.00% Cant.SabadoYDomingo
##   41.51% Cant.Abril
##   32.70% Cant.Enero
## 
## 
## Time: 0.0 secs

summary(C5.0(cluster_KM3 ~ ., data=BarriosYAccidentes, rules=TRUE))

## 
## Call:
## C5.0.formula(formula = cluster_KM3 ~ ., data = BarriosYAccidentes, rules = TRUE)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Wed Sep 09 19:51:27 2020
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 318 cases (32 attributes) from undefined.data
## 
## Rules:
## 
## Rule 1: (99/1, lift 3.1)
##  Cant.SabadoYDomingo > 147
##  Cant.Enero > 34
##  Cant.Abril <= 143
##  ->  class 1  [0.980]
## 
## Rule 2: (175/1, lift 1.7)
##  Cant.Enero <= 34
##  ->  class 2  [0.989]
## 
## Rule 3: (186/1, lift 1.7)
##  Cant.SabadoYDomingo <= 147
##  ->  class 2  [0.989]
## 
## Rule 4: (28, lift 11.0)
##  Cant.Abril > 143
##  ->  class 3  [0.967]
## 
## Default class: 2
## 
## 
## Evaluation on training data (318 cases):
## 
##          Rules     
##    ----------------
##      No      Errors
## 
##       4    3( 0.9%)   <<
## 
## 
##     (a)   (b)   (c)    <-classified as
##    ----  ----  ----
##      98     2          (a): class 1
##       1   189          (b): class 2
##                  28    (c): class 3
## 
## 
##  Attribute usage:
## 
##   89.62% Cant.SabadoYDomingo
##   86.16% Cant.Enero
##   39.94% Cant.Abril
## 
## 
## Time: 0.0 secs

#Boosting

modelo.c50boost<- C5.0(cluster_KM3 ~ ., data=BarriosYAccidentes, trials=100)
plot(modelo.c50boost)

# analizando el modelo de agrupamiento obtenido utlizando clasificacion usando Random Forest

set.seed(111)

modelo.RF<-randomForest(cluster_KM3 ~ ., data=BarriosYAccidentes, ntree=100, importance=T)
plot(modelo.RF)

print(modelo.RF$importance)

##                                  1             2           3
## Cant.LunesAViernes    0.0193983786  0.0111279944 0.077526779
## Cant.SabadoYDomingo   0.0918622898  0.0621053625 0.117683261
## Cant.Enero            0.0318714742  0.0044507901 0.063422799
## Cant.Febrero          0.0144694414  0.0025386901 0.005000000
## Cant.Marzo            0.0095256516  0.0053040233 0.022136364
## Cant.Abril            0.0607064449  0.0314221728 0.153435398
## Cant.Mayo             0.0060859205  0.0003203090 0.046696970
## Cant.Junio            0.0394501680  0.0240452614 0.144774309
## Cant.Julio            0.0096751254  0.0009059172 0.092345960
## Cant.Agosto           0.0309229479  0.0042246013 0.079374736
## Cant.Septiembre       0.0228890134  0.0193276096 0.045908702
## Cant.Octubre          0.0222178517  0.0105226633 0.081907925
## Cant.Noviembre        0.0103212768  0.0035101518 0.012272727
## Cant.Diciembre        0.0112340797  0.0064698459 0.017888112
## Cant.Atropello        0.0175140590  0.0079777269 0.002222222
## Cant.CaidadeOcupante  0.0068513901  0.0046262764 0.000000000
## Cant.Choque           0.0038130140  0.0055602971 0.007500000
## Cant.ChoqueyAtropello 0.0000000000  0.0000000000 0.000000000
## Cant.Incendio         0.0000000000  0.0000000000 0.000000000
## Cant.Otro             0.0153855908  0.0025704448 0.011444444
## Cant.Volcamiento      0.0019359039 -0.0001296705 0.018257576
## Prom.Lunes            0.0297167054  0.0104303666 0.066374237
## Prom.Martes           0.0096628205  0.0166812150 0.053829004
## Prom.Miercoles        0.0094324950  0.0073382928 0.066358586
## Prom.Jueves           0.0192202874  0.0009860188 0.007142857
## Prom.Viernes          0.0278831928  0.0043307807 0.062590909
## Prom.Sabado           0.0749554813  0.0444597891 0.094082001
## Prom.Domingo          0.0171086222  0.0095689437 0.008888889
## Cant.Herido           0.0248196152  0.0189669795 0.061684149
## Cant.Muerto           0.0005733006 -0.0001550713 0.006363636
## Cant.Solodaños        0.0006861322  0.0041824699 0.001250000
##                       MeanDecreaseAccuracy MeanDecreaseGini
## Cant.LunesAViernes             0.020043339        6.4419968
## Cant.SabadoYDomingo            0.077512888       29.5250836
## Cant.Enero                     0.018618382        5.8555404
## Cant.Febrero                   0.006688890        0.5808970
## Cant.Marzo                     0.007767399        3.1929940
## Cant.Abril                     0.051549244       14.9371034
## Cant.Mayo                      0.006126934        4.0860543
## Cant.Junio                     0.039434492       12.7900091
## Cant.Julio                     0.011266903        6.8052435
## Cant.Agosto                    0.018741933        7.0545303
## Cant.Septiembre                0.022937547        5.7598355
## Cant.Octubre                   0.020705479        4.7614581
## Cant.Noviembre                 0.006495920        2.3708172
## Cant.Diciembre                 0.009058918        3.5176547
## Cant.Atropello                 0.010334778        0.7819900
## Cant.CaidadeOcupante           0.004773217        0.8339558
## Cant.Choque                    0.005293528        0.7728750
## Cant.ChoqueyAtropello          0.000000000        0.0000000
## Cant.Incendio                  0.000000000        0.0000000
## Cant.Otro                      0.007387652        2.6558982
## Cant.Volcamiento               0.002303266        0.2995172
## Prom.Lunes                     0.021257216        8.1936747
## Prom.Martes                    0.017494566        3.2504045
## Prom.Miercoles                 0.013213047        4.5120648
## Prom.Jueves                    0.007881923        1.4888083
## Prom.Viernes                   0.017415297        4.3218956
## Prom.Sabado                    0.058471927       22.0205698
## Prom.Domingo                   0.011769134        0.8905496
## Cant.Herido                    0.024625827       11.2870618
## Cant.Muerto                    0.000634663        0.1050367
## Cant.Solodaños                 0.002739340        0.1901909

varImpPlot(modelo.RF)

# fin

# Interpretación de los agrupamientos obtenidos:

# Cluster 1 : Barrios con alta accidentalidad los fines de semana y con mayor número de accidentes en enero. 
# es el segundo grupo donde mas muertos por accidente se presentan. 
# La consideramos como Zona de riesgo Medio

# Cluster 2 : Barrios con alto nivel de accidentalidad durante toda la semana, pero con menor número de accidentes en enero y abril.
# Son los barrios con menor promedio de muertos por accidente
# La consideramos como Zona de riesgo Bajo.

# Cluster 3: Son Barrios con la mayor accidentalidad los fines de semana, con mayor proporción de muertes, mayor proporción de heridos.
# mayor proporción de accidentes con solo daños, con mayor número de accidentes en abril. 
# La consideramos como Zona de riesgo alto.

# Con los agrupamientos obtenido se procede a presentarlos en un mapa el cual se publica en la dirección:
# https://william-jovel.shinyapps.io/Accidentes/

# Bibliografia
# An Introduction to Statistical Learning: with Applications in R (Springer Texts in Statistics)
# https://www.r-graph-gallery.com/dendrogram/
# https://www.r-graph-gallery.com/336-interactive-dendrogram-with-collapsibletree/
# https://www.analyticsvidhya.com/blog/2016/11/an-introduction-to-clustering-and-different-methods-of-clustering/

Clustering Accidentalidad en Medellin años 2014-2018

Manuela Londono Ocampo, Alexis Arenas Bustamante, Juan Esteban Arroyave, William Jovel Tamayo

9/9/2020

Se oserva de la grafica de Silhouette que algunos barrios quedaron mal agrupados ( valores negativos en el grupo 2 )