Contexto

La base de datos USArrests contiene estadísticas en arrestos por cada 100,000 residentes por agresión, asesinato y violación en cada uno de los 50 estados de EE.UU. En 1973.

Instalar paquetesy llamar librerias

library(cluster)
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(data.table)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(rnaturalearth)
library(rnaturalearthdata)
## 
## Adjuntando el paquete: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
library(devtools)
## Cargando paquete requerido: usethis
library(caret)
## Cargando paquete requerido: lattice
library(datasets)
library(ggplot2)
library(lattice)
library(DataExplorer)

Importar base de datos

df <- USArrests
summary(df)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00

Clusters

# Scale the data
scaled_df <- scale(df)
# Generate the clusters
n_clusters = 4
kmean <- kmeans(scaled_df, centers = n_clusters)
# Assign the labels
labels <- cbind(df, cluster = kmean$cluster)
# Plot the clusters
fviz_cluster(kmean, data = df)

# Optimize the clusters
set.seed(123)
optimal <- clusGap(scaled_df, FUN = kmeans, nstart = 1, K.max = 10)
plot(optimal, xlab = 'Number of clusters')

# Metodo de la silueta
# fviz_nbclust(df, kmeans, method = 'wss') +
#   ggtitle('Elbow method')
# Compare the clusters
cluster_mean <- aggregate(labels, by = list(labels$cluster), FUN = mean)
cluster_mean
##   Group.1 Murder Assault UrbanPop   Rape cluster
## 1       1  5.050  136.70     79.3 17.590       1
## 2       2  6.880  136.50     60.6 19.330       2
## 3       3  2.680   70.10     51.0 10.910       3
## 4       4 12.165  255.25     68.4 29.165       4
table(labels$cluster)
## 
##  1  2  3  4 
## 10 10 10 20
new_cluster_names <- c("Safe State", "High Crime State", "Dangerous State", "Low Crime State")

labels <- labels %>%
  mutate(cluster_name = case_when(
    cluster == 1 ~ new_cluster_names[1],
    cluster == 2 ~ new_cluster_names[2],
    cluster == 3 ~ new_cluster_names[3],
    cluster == 4 ~ new_cluster_names[4]
  ))
print(labels)
##                Murder Assault UrbanPop Rape cluster     cluster_name
## Alabama          13.2     236       58 21.2       4  Low Crime State
## Alaska           10.0     263       48 44.5       4  Low Crime State
## Arizona           8.1     294       80 31.0       4  Low Crime State
## Arkansas          8.8     190       50 19.5       2 High Crime State
## California        9.0     276       91 40.6       4  Low Crime State
## Colorado          7.9     204       78 38.7       4  Low Crime State
## Connecticut       3.3     110       77 11.1       1       Safe State
## Delaware          5.9     238       72 15.8       1       Safe State
## Florida          15.4     335       80 31.9       4  Low Crime State
## Georgia          17.4     211       60 25.8       4  Low Crime State
## Hawaii            5.3      46       83 20.2       1       Safe State
## Idaho             2.6     120       54 14.2       3  Dangerous State
## Illinois         10.4     249       83 24.0       4  Low Crime State
## Indiana           7.2     113       65 21.0       2 High Crime State
## Iowa              2.2      56       57 11.3       3  Dangerous State
## Kansas            6.0     115       66 18.0       2 High Crime State
## Kentucky          9.7     109       52 16.3       2 High Crime State
## Louisiana        15.4     249       66 22.2       4  Low Crime State
## Maine             2.1      83       51  7.8       3  Dangerous State
## Maryland         11.3     300       67 27.8       4  Low Crime State
## Massachusetts     4.4     149       85 16.3       1       Safe State
## Michigan         12.1     255       74 35.1       4  Low Crime State
## Minnesota         2.7      72       66 14.9       3  Dangerous State
## Mississippi      16.1     259       44 17.1       4  Low Crime State
## Missouri          9.0     178       70 28.2       4  Low Crime State
## Montana           6.0     109       53 16.4       2 High Crime State
## Nebraska          4.3     102       62 16.5       2 High Crime State
## Nevada           12.2     252       81 46.0       4  Low Crime State
## New Hampshire     2.1      57       56  9.5       3  Dangerous State
## New Jersey        7.4     159       89 18.8       1       Safe State
## New Mexico       11.4     285       70 32.1       4  Low Crime State
## New York         11.1     254       86 26.1       4  Low Crime State
## North Carolina   13.0     337       45 16.1       4  Low Crime State
## North Dakota      0.8      45       44  7.3       3  Dangerous State
## Ohio              7.3     120       75 21.4       1       Safe State
## Oklahoma          6.6     151       68 20.0       2 High Crime State
## Oregon            4.9     159       67 29.3       2 High Crime State
## Pennsylvania      6.3     106       72 14.9       1       Safe State
## Rhode Island      3.4     174       87  8.3       1       Safe State
## South Carolina   14.4     279       48 22.5       4  Low Crime State
## South Dakota      3.8      86       45 12.8       3  Dangerous State
## Tennessee        13.2     188       59 26.9       4  Low Crime State
## Texas            12.7     201       80 25.5       4  Low Crime State
## Utah              3.2     120       80 22.9       1       Safe State
## Vermont           2.2      48       32 11.2       3  Dangerous State
## Virginia          8.5     156       63 20.7       2 High Crime State
## Washington        4.0     145       73 26.2       1       Safe State
## West Virginia     5.7      81       39  9.3       3  Dangerous State
## Wisconsin         2.6      53       66 10.8       3  Dangerous State
## Wyoming           6.8     161       60 15.6       2 High Crime State

Modelo de Random Forest

labels <- labels %>%
  mutate(cluster_name = as.factor(cluster_name)) %>%
  select(-cluster)
print(labels)
##                Murder Assault UrbanPop Rape     cluster_name
## Alabama          13.2     236       58 21.2  Low Crime State
## Alaska           10.0     263       48 44.5  Low Crime State
## Arizona           8.1     294       80 31.0  Low Crime State
## Arkansas          8.8     190       50 19.5 High Crime State
## California        9.0     276       91 40.6  Low Crime State
## Colorado          7.9     204       78 38.7  Low Crime State
## Connecticut       3.3     110       77 11.1       Safe State
## Delaware          5.9     238       72 15.8       Safe State
## Florida          15.4     335       80 31.9  Low Crime State
## Georgia          17.4     211       60 25.8  Low Crime State
## Hawaii            5.3      46       83 20.2       Safe State
## Idaho             2.6     120       54 14.2  Dangerous State
## Illinois         10.4     249       83 24.0  Low Crime State
## Indiana           7.2     113       65 21.0 High Crime State
## Iowa              2.2      56       57 11.3  Dangerous State
## Kansas            6.0     115       66 18.0 High Crime State
## Kentucky          9.7     109       52 16.3 High Crime State
## Louisiana        15.4     249       66 22.2  Low Crime State
## Maine             2.1      83       51  7.8  Dangerous State
## Maryland         11.3     300       67 27.8  Low Crime State
## Massachusetts     4.4     149       85 16.3       Safe State
## Michigan         12.1     255       74 35.1  Low Crime State
## Minnesota         2.7      72       66 14.9  Dangerous State
## Mississippi      16.1     259       44 17.1  Low Crime State
## Missouri          9.0     178       70 28.2  Low Crime State
## Montana           6.0     109       53 16.4 High Crime State
## Nebraska          4.3     102       62 16.5 High Crime State
## Nevada           12.2     252       81 46.0  Low Crime State
## New Hampshire     2.1      57       56  9.5  Dangerous State
## New Jersey        7.4     159       89 18.8       Safe State
## New Mexico       11.4     285       70 32.1  Low Crime State
## New York         11.1     254       86 26.1  Low Crime State
## North Carolina   13.0     337       45 16.1  Low Crime State
## North Dakota      0.8      45       44  7.3  Dangerous State
## Ohio              7.3     120       75 21.4       Safe State
## Oklahoma          6.6     151       68 20.0 High Crime State
## Oregon            4.9     159       67 29.3 High Crime State
## Pennsylvania      6.3     106       72 14.9       Safe State
## Rhode Island      3.4     174       87  8.3       Safe State
## South Carolina   14.4     279       48 22.5  Low Crime State
## South Dakota      3.8      86       45 12.8  Dangerous State
## Tennessee        13.2     188       59 26.9  Low Crime State
## Texas            12.7     201       80 25.5  Low Crime State
## Utah              3.2     120       80 22.9       Safe State
## Vermont           2.2      48       32 11.2  Dangerous State
## Virginia          8.5     156       63 20.7 High Crime State
## Washington        4.0     145       73 26.2       Safe State
## West Virginia     5.7      81       39  9.3  Dangerous State
## Wisconsin         2.6      53       66 10.8  Dangerous State
## Wyoming           6.8     161       60 15.6 High Crime State
# split the data 
set.seed(123)
training <- createDataPartition(labels$cluster_name, p = 0.8, list = FALSE)
training_data <- labels[training, ]
test_data <- labels[-training, ]
model <- train(
  cluster_name ~ ., 
  data = training_data,
  method = 'nnet',
  preProcess = c('scale', 'center'),
  trControl = trainControl(method = 'cv', number = 10)
)
## # weights:  13
## initial  value 52.116221 
## iter  10 value 22.979921
## iter  20 value 21.617726
## iter  30 value 21.611760
## iter  40 value 21.610775
## iter  50 value 21.610593
## iter  50 value 21.610593
## iter  50 value 21.610593
## final  value 21.610593 
## converged
## # weights:  31
## initial  value 54.099065 
## iter  10 value 4.995589
## iter  20 value 0.009486
## final  value 0.000054 
## converged
## # weights:  49
## initial  value 52.120979 
## iter  10 value 9.977961
## iter  20 value 0.036501
## final  value 0.000090 
## converged
## # weights:  13
## initial  value 51.411675 
## iter  10 value 32.892405
## iter  20 value 30.207433
## final  value 30.207382 
## converged
## # weights:  31
## initial  value 64.458832 
## iter  10 value 20.208545
## iter  20 value 19.008376
## iter  30 value 18.851746
## iter  40 value 18.693471
## iter  50 value 18.683157
## iter  50 value 18.683157
## iter  50 value 18.683157
## final  value 18.683157 
## converged
## # weights:  49
## initial  value 56.581586 
## iter  10 value 18.197468
## iter  20 value 16.426749
## iter  30 value 16.239299
## iter  40 value 16.238199
## final  value 16.238198 
## converged
## # weights:  13
## initial  value 51.716739 
## iter  10 value 23.147390
## iter  20 value 17.631757
## iter  30 value 16.282374
## iter  40 value 11.912752
## iter  50 value 9.980716
## iter  60 value 9.826101
## iter  70 value 9.695089
## iter  80 value 9.197012
## iter  90 value 9.185900
## iter 100 value 9.175619
## final  value 9.175619 
## stopped after 100 iterations
## # weights:  31
## initial  value 49.520737 
## iter  10 value 9.795186
## iter  20 value 3.739103
## iter  30 value 3.382503
## iter  40 value 3.127599
## iter  50 value 2.325352
## iter  60 value 0.536733
## iter  70 value 0.342782
## iter  80 value 0.320620
## iter  90 value 0.284845
## iter 100 value 0.202199
## final  value 0.202199 
## stopped after 100 iterations
## # weights:  49
## initial  value 59.413279 
## iter  10 value 3.579442
## iter  20 value 0.177062
## iter  30 value 0.148812
## iter  40 value 0.129814
## iter  50 value 0.120345
## iter  60 value 0.116334
## iter  70 value 0.109206
## iter  80 value 0.103204
## iter  90 value 0.100500
## iter 100 value 0.097175
## final  value 0.097175 
## stopped after 100 iterations
## # weights:  13
## initial  value 49.316975 
## iter  10 value 22.618526
## iter  20 value 18.284076
## iter  30 value 15.304232
## iter  40 value 15.046297
## iter  50 value 14.298623
## iter  60 value 8.992678
## iter  70 value 7.586123
## iter  80 value 6.909568
## iter  90 value 6.856355
## iter 100 value 6.725461
## final  value 6.725461 
## stopped after 100 iterations
## # weights:  31
## initial  value 52.449550 
## iter  10 value 13.182943
## iter  20 value 0.264222
## iter  30 value 0.009333
## iter  40 value 0.002762
## final  value 0.000073 
## converged
## # weights:  49
## initial  value 55.403389 
## iter  10 value 5.876817
## iter  20 value 0.080420
## iter  30 value 0.000923
## final  value 0.000073 
## converged
## # weights:  13
## initial  value 56.021994 
## iter  10 value 31.178608
## iter  20 value 29.929985
## final  value 29.929457 
## converged
## # weights:  31
## initial  value 55.157000 
## iter  10 value 19.657673
## iter  20 value 18.415518
## iter  30 value 18.414324
## final  value 18.414323 
## converged
## # weights:  49
## initial  value 54.316000 
## iter  10 value 21.729740
## iter  20 value 16.319620
## iter  30 value 16.115091
## iter  40 value 16.113531
## final  value 16.113528 
## converged
## # weights:  13
## initial  value 50.498615 
## iter  10 value 30.035011
## iter  20 value 22.867407
## iter  30 value 22.848975
## iter  40 value 22.772774
## iter  50 value 22.416569
## iter  60 value 22.244525
## iter  70 value 22.201195
## iter  80 value 22.159986
## iter  90 value 22.147912
## iter 100 value 22.144711
## final  value 22.144711 
## stopped after 100 iterations
## # weights:  31
## initial  value 65.534658 
## iter  10 value 6.565414
## iter  20 value 0.192389
## iter  30 value 0.164371
## iter  40 value 0.129280
## iter  50 value 0.120841
## iter  60 value 0.114705
## iter  70 value 0.110305
## iter  80 value 0.107986
## iter  90 value 0.106719
## iter 100 value 0.105150
## final  value 0.105150 
## stopped after 100 iterations
## # weights:  49
## initial  value 60.188524 
## iter  10 value 2.873263
## iter  20 value 0.172720
## iter  30 value 0.150986
## iter  40 value 0.126915
## iter  50 value 0.121971
## iter  60 value 0.112863
## iter  70 value 0.105100
## iter  80 value 0.100685
## iter  90 value 0.095399
## iter 100 value 0.092388
## final  value 0.092388 
## stopped after 100 iterations
## # weights:  13
## initial  value 51.075551 
## iter  10 value 23.059438
## iter  20 value 18.798521
## iter  30 value 17.720069
## iter  40 value 17.711311
## iter  50 value 17.697112
## iter  60 value 17.617307
## iter  70 value 17.598133
## iter  80 value 17.593245
## iter  90 value 17.549367
## iter 100 value 17.546575
## final  value 17.546575 
## stopped after 100 iterations
## # weights:  31
## initial  value 50.189915 
## iter  10 value 5.702968
## iter  20 value 0.041499
## iter  30 value 0.000178
## iter  30 value 0.000100
## iter  30 value 0.000100
## final  value 0.000100 
## converged
## # weights:  49
## initial  value 48.945589 
## iter  10 value 3.399431
## iter  20 value 0.164070
## iter  30 value 0.004924
## iter  40 value 0.002668
## final  value 0.000096 
## converged
## # weights:  13
## initial  value 57.508878 
## iter  10 value 29.950906
## iter  20 value 29.687476
## final  value 29.687410 
## converged
## # weights:  31
## initial  value 56.922655 
## iter  10 value 20.450118
## iter  20 value 17.956605
## iter  30 value 17.903606
## iter  40 value 17.858701
## final  value 17.853085 
## converged
## # weights:  49
## initial  value 56.117893 
## iter  10 value 18.393147
## iter  20 value 15.977309
## iter  30 value 15.644858
## iter  40 value 15.639555
## iter  50 value 15.606812
## iter  60 value 15.486600
## iter  70 value 15.485916
## final  value 15.485916 
## converged
## # weights:  13
## initial  value 51.542641 
## iter  10 value 24.275276
## iter  20 value 21.484878
## iter  30 value 21.108215
## iter  40 value 21.016805
## iter  50 value 20.995176
## iter  60 value 20.983845
## iter  70 value 20.963701
## iter  80 value 20.860117
## iter  90 value 20.839022
## iter 100 value 19.583835
## final  value 19.583835 
## stopped after 100 iterations
## # weights:  31
## initial  value 55.587875 
## iter  10 value 14.733351
## iter  20 value 11.481733
## iter  30 value 11.034365
## iter  40 value 10.795005
## iter  50 value 10.629401
## iter  60 value 6.261132
## iter  70 value 0.489179
## iter  80 value 0.387801
## iter  90 value 0.358990
## iter 100 value 0.315604
## final  value 0.315604 
## stopped after 100 iterations
## # weights:  49
## initial  value 46.432638 
## iter  10 value 5.217843
## iter  20 value 0.416876
## iter  30 value 0.271250
## iter  40 value 0.226659
## iter  50 value 0.189875
## iter  60 value 0.148685
## iter  70 value 0.124372
## iter  80 value 0.111494
## iter  90 value 0.104530
## iter 100 value 0.101146
## final  value 0.101146 
## stopped after 100 iterations
## # weights:  13
## initial  value 59.465164 
## iter  10 value 23.837208
## iter  20 value 21.972129
## iter  30 value 21.926459
## iter  40 value 21.921458
## final  value 21.921457 
## converged
## # weights:  31
## initial  value 55.518870 
## iter  10 value 6.115283
## iter  20 value 2.803667
## iter  30 value 0.638613
## iter  40 value 0.101044
## iter  50 value 0.029303
## iter  60 value 0.010704
## iter  70 value 0.005386
## iter  80 value 0.002542
## iter  90 value 0.001554
## iter 100 value 0.001350
## final  value 0.001350 
## stopped after 100 iterations
## # weights:  49
## initial  value 46.680663 
## iter  10 value 1.013195
## iter  20 value 0.002391
## final  value 0.000063 
## converged
## # weights:  13
## initial  value 49.751622 
## iter  10 value 35.664867
## iter  20 value 30.893747
## iter  30 value 30.842907
## iter  40 value 30.841431
## final  value 30.841410 
## converged
## # weights:  31
## initial  value 52.598224 
## iter  10 value 20.560187
## iter  20 value 18.909314
## iter  30 value 18.821225
## final  value 18.821219 
## converged
## # weights:  49
## initial  value 50.872298 
## iter  10 value 18.422817
## iter  20 value 16.655003
## iter  30 value 16.585178
## iter  40 value 16.413468
## iter  50 value 16.412497
## final  value 16.412496 
## converged
## # weights:  13
## initial  value 57.217155 
## iter  10 value 24.902720
## iter  20 value 24.178928
## iter  30 value 24.176246
## iter  40 value 24.173377
## iter  50 value 24.169297
## iter  60 value 24.113955
## iter  70 value 23.892903
## iter  80 value 22.431672
## iter  90 value 19.942788
## iter 100 value 14.776878
## final  value 14.776878 
## stopped after 100 iterations
## # weights:  31
## initial  value 57.149411 
## iter  10 value 10.113007
## iter  20 value 6.650184
## iter  30 value 6.305432
## iter  40 value 5.139439
## iter  50 value 4.324924
## iter  60 value 4.012281
## iter  70 value 3.981113
## iter  80 value 3.817072
## iter  90 value 3.743945
## iter 100 value 3.690590
## final  value 3.690590 
## stopped after 100 iterations
## # weights:  49
## initial  value 52.042206 
## iter  10 value 3.678868
## iter  20 value 0.285262
## iter  30 value 0.248392
## iter  40 value 0.204152
## iter  50 value 0.160979
## iter  60 value 0.141652
## iter  70 value 0.134840
## iter  80 value 0.122514
## iter  90 value 0.115861
## iter 100 value 0.106892
## final  value 0.106892 
## stopped after 100 iterations
## # weights:  13
## initial  value 56.375956 
## iter  10 value 24.765656
## iter  20 value 24.125931
## final  value 24.124665 
## converged
## # weights:  31
## initial  value 52.052573 
## iter  10 value 17.165314
## iter  20 value 6.655472
## iter  30 value 2.841610
## iter  40 value 2.533361
## iter  50 value 2.507732
## iter  60 value 2.502126
## iter  70 value 2.493355
## iter  80 value 2.450227
## iter  90 value 2.333923
## iter 100 value 2.060373
## final  value 2.060373 
## stopped after 100 iterations
## # weights:  49
## initial  value 59.165306 
## iter  10 value 6.559046
## iter  20 value 0.015291
## final  value 0.000079 
## converged
## # weights:  13
## initial  value 49.949119 
## iter  10 value 31.200649
## iter  20 value 29.803881
## final  value 29.780093 
## converged
## # weights:  31
## initial  value 54.060467 
## iter  10 value 23.385542
## iter  20 value 18.646394
## iter  30 value 18.528779
## final  value 18.528627 
## converged
## # weights:  49
## initial  value 54.799240 
## iter  10 value 18.988505
## iter  20 value 17.381968
## iter  30 value 17.147115
## iter  40 value 17.007255
## iter  50 value 16.986250
## iter  60 value 16.986155
## final  value 16.986152 
## converged
## # weights:  13
## initial  value 51.481757 
## iter  10 value 23.379796
## iter  20 value 21.499050
## iter  30 value 20.573314
## iter  40 value 19.976979
## iter  50 value 16.006588
## iter  60 value 10.840780
## iter  70 value 10.436921
## iter  80 value 9.971905
## iter  90 value 9.960733
## iter 100 value 9.959128
## final  value 9.959128 
## stopped after 100 iterations
## # weights:  31
## initial  value 54.058446 
## iter  10 value 17.630572
## iter  20 value 3.047147
## iter  30 value 0.157937
## iter  40 value 0.142202
## iter  50 value 0.131654
## iter  60 value 0.117311
## iter  70 value 0.113007
## iter  80 value 0.110438
## iter  90 value 0.109481
## iter 100 value 0.107759
## final  value 0.107759 
## stopped after 100 iterations
## # weights:  49
## initial  value 56.749507 
## iter  10 value 4.085875
## iter  20 value 0.245387
## iter  30 value 0.183887
## iter  40 value 0.146488
## iter  50 value 0.129128
## iter  60 value 0.119442
## iter  70 value 0.107908
## iter  80 value 0.100516
## iter  90 value 0.096734
## iter 100 value 0.094596
## final  value 0.094596 
## stopped after 100 iterations
## # weights:  13
## initial  value 51.545718 
## iter  10 value 23.065084
## iter  20 value 22.180652
## iter  30 value 20.761207
## iter  40 value 20.133806
## iter  50 value 19.840173
## iter  60 value 19.721684
## iter  70 value 19.423660
## iter  80 value 19.300545
## iter  90 value 19.256965
## iter 100 value 19.233888
## final  value 19.233888 
## stopped after 100 iterations
## # weights:  31
## initial  value 51.166218 
## iter  10 value 8.854028
## iter  20 value 5.598661
## iter  30 value 3.748796
## iter  40 value 3.177309
## iter  50 value 3.019161
## iter  60 value 3.014512
## iter  70 value 3.014449
## iter  80 value 3.014398
## iter  90 value 3.014347
## final  value 3.014346 
## converged
## # weights:  49
## initial  value 51.385594 
## iter  10 value 3.864726
## iter  20 value 0.023973
## iter  30 value 0.000550
## final  value 0.000082 
## converged
## # weights:  13
## initial  value 58.498018 
## iter  10 value 29.814176
## iter  20 value 29.618570
## final  value 29.618484 
## converged
## # weights:  31
## initial  value 53.253606 
## iter  10 value 21.683680
## iter  20 value 18.745500
## iter  30 value 18.734290
## final  value 18.733684 
## converged
## # weights:  49
## initial  value 52.963155 
## iter  10 value 17.326626
## iter  20 value 15.840120
## iter  30 value 15.525514
## iter  40 value 15.507347
## iter  50 value 15.507236
## final  value 15.507236 
## converged
## # weights:  13
## initial  value 48.202119 
## iter  10 value 23.594806
## iter  20 value 22.345142
## iter  30 value 22.273959
## iter  40 value 22.192792
## iter  50 value 22.172968
## iter  60 value 22.111989
## iter  70 value 21.380158
## iter  80 value 20.845710
## iter  90 value 20.806068
## iter 100 value 19.980391
## final  value 19.980391 
## stopped after 100 iterations
## # weights:  31
## initial  value 51.630029 
## iter  10 value 9.836435
## iter  20 value 0.531847
## iter  30 value 0.371597
## iter  40 value 0.321157
## iter  50 value 0.280553
## iter  60 value 0.259706
## iter  70 value 0.242709
## iter  80 value 0.231569
## iter  90 value 0.221853
## iter 100 value 0.215153
## final  value 0.215153 
## stopped after 100 iterations
## # weights:  49
## initial  value 52.307769 
## iter  10 value 4.045411
## iter  20 value 0.204075
## iter  30 value 0.176379
## iter  40 value 0.156087
## iter  50 value 0.143211
## iter  60 value 0.126212
## iter  70 value 0.117100
## iter  80 value 0.110159
## iter  90 value 0.107309
## iter 100 value 0.103566
## final  value 0.103566 
## stopped after 100 iterations
## # weights:  13
## initial  value 50.458978 
## iter  10 value 23.181762
## iter  20 value 23.057675
## iter  30 value 23.037111
## iter  40 value 22.999354
## iter  50 value 22.798011
## iter  60 value 21.998726
## iter  70 value 21.945189
## iter  80 value 21.918283
## iter  90 value 21.133511
## iter 100 value 19.272280
## final  value 19.272280 
## stopped after 100 iterations
## # weights:  31
## initial  value 55.515363 
## iter  10 value 11.673394
## iter  20 value 2.871300
## iter  30 value 0.003099
## final  value 0.000091 
## converged
## # weights:  49
## initial  value 49.858083 
## iter  10 value 1.775548
## iter  20 value 0.000401
## final  value 0.000071 
## converged
## # weights:  13
## initial  value 50.349839 
## iter  10 value 29.287836
## iter  20 value 28.948251
## final  value 28.948219 
## converged
## # weights:  31
## initial  value 50.477634 
## iter  10 value 26.096557
## iter  20 value 18.366878
## iter  30 value 18.146232
## iter  40 value 18.142578
## final  value 18.142578 
## converged
## # weights:  49
## initial  value 70.266878 
## iter  10 value 18.199665
## iter  20 value 16.175907
## iter  30 value 16.148424
## iter  40 value 16.143947
## final  value 16.143919 
## converged
## # weights:  13
## initial  value 49.686192 
## iter  10 value 23.234192
## iter  20 value 20.062437
## iter  30 value 18.663716
## iter  40 value 17.359470
## iter  50 value 16.048058
## iter  60 value 10.719124
## iter  70 value 9.664989
## iter  80 value 9.266331
## iter  90 value 8.988284
## iter 100 value 8.559374
## final  value 8.559374 
## stopped after 100 iterations
## # weights:  31
## initial  value 54.171408 
## iter  10 value 13.656563
## iter  20 value 6.386345
## iter  30 value 5.460039
## iter  40 value 3.709205
## iter  50 value 2.130715
## iter  60 value 1.492281
## iter  70 value 1.347943
## iter  80 value 1.210905
## iter  90 value 1.003687
## iter 100 value 0.971483
## final  value 0.971483 
## stopped after 100 iterations
## # weights:  49
## initial  value 52.069662 
## iter  10 value 0.655843
## iter  20 value 0.354074
## iter  30 value 0.240498
## iter  40 value 0.169816
## iter  50 value 0.147729
## iter  60 value 0.124697
## iter  70 value 0.119114
## iter  80 value 0.114547
## iter  90 value 0.110633
## iter 100 value 0.108335
## final  value 0.108335 
## stopped after 100 iterations
## # weights:  13
## initial  value 57.589874 
## iter  10 value 33.376929
## iter  20 value 29.104331
## iter  30 value 28.537574
## iter  40 value 28.499295
## iter  50 value 28.435001
## iter  60 value 28.080424
## iter  70 value 27.972488
## iter  80 value 27.838201
## iter  90 value 27.809540
## iter 100 value 27.765989
## final  value 27.765989 
## stopped after 100 iterations
## # weights:  31
## initial  value 50.413803 
## iter  10 value 15.685293
## iter  20 value 0.110239
## iter  30 value 0.000248
## final  value 0.000067 
## converged
## # weights:  49
## initial  value 50.658089 
## iter  10 value 2.318984
## iter  20 value 0.005450
## final  value 0.000055 
## converged
## # weights:  13
## initial  value 56.924422 
## iter  10 value 30.831132
## iter  20 value 30.250194
## final  value 30.250189 
## converged
## # weights:  31
## initial  value 57.771461 
## iter  10 value 21.235261
## iter  20 value 18.872212
## iter  30 value 18.705789
## iter  40 value 18.700743
## final  value 18.700742 
## converged
## # weights:  49
## initial  value 55.297053 
## iter  10 value 18.061170
## iter  20 value 16.466768
## iter  30 value 16.399129
## iter  40 value 16.335166
## iter  50 value 16.271119
## iter  60 value 16.270876
## final  value 16.270875 
## converged
## # weights:  13
## initial  value 58.712642 
## iter  10 value 23.873448
## iter  20 value 17.735656
## iter  30 value 12.071993
## iter  40 value 9.619071
## iter  50 value 9.131800
## iter  60 value 9.078221
## iter  70 value 9.069428
## iter  80 value 9.063154
## iter  90 value 9.055510
## iter 100 value 9.044493
## final  value 9.044493 
## stopped after 100 iterations
## # weights:  31
## initial  value 49.320478 
## iter  10 value 9.065647
## iter  20 value 3.261085
## iter  30 value 1.782577
## iter  40 value 0.695241
## iter  50 value 0.594693
## iter  60 value 0.477195
## iter  70 value 0.458350
## iter  80 value 0.393243
## iter  90 value 0.338720
## iter 100 value 0.305230
## final  value 0.305230 
## stopped after 100 iterations
## # weights:  49
## initial  value 50.093008 
## iter  10 value 4.725724
## iter  20 value 0.330928
## iter  30 value 0.304852
## iter  40 value 0.243937
## iter  50 value 0.184527
## iter  60 value 0.149030
## iter  70 value 0.138952
## iter  80 value 0.131202
## iter  90 value 0.124699
## iter 100 value 0.119725
## final  value 0.119725 
## stopped after 100 iterations
## # weights:  13
## initial  value 48.808703 
## iter  10 value 23.436771
## iter  20 value 20.178103
## iter  30 value 15.622456
## iter  40 value 11.444742
## iter  50 value 10.149950
## iter  60 value 9.242817
## iter  70 value 8.674975
## iter  80 value 8.243298
## iter  90 value 7.999979
## iter 100 value 7.791685
## final  value 7.791685 
## stopped after 100 iterations
## # weights:  31
## initial  value 44.808279 
## iter  10 value 3.908462
## iter  20 value 0.020993
## final  value 0.000064 
## converged
## # weights:  49
## initial  value 53.921382 
## iter  10 value 4.221689
## iter  20 value 0.014770
## iter  30 value 0.002208
## final  value 0.000054 
## converged
## # weights:  13
## initial  value 52.344561 
## iter  10 value 31.908573
## iter  20 value 29.417846
## final  value 29.387985 
## converged
## # weights:  31
## initial  value 60.535716 
## iter  10 value 23.492196
## iter  20 value 17.473166
## iter  30 value 17.297074
## final  value 17.296388 
## converged
## # weights:  49
## initial  value 57.494544 
## iter  10 value 16.535061
## iter  20 value 15.272767
## iter  30 value 15.213974
## iter  40 value 15.209630
## final  value 15.209626 
## converged
## # weights:  13
## initial  value 56.045316 
## iter  10 value 24.561951
## iter  20 value 18.639107
## iter  30 value 15.418322
## iter  40 value 10.684061
## iter  50 value 9.830400
## iter  60 value 9.632803
## iter  70 value 9.533287
## iter  80 value 9.223434
## iter  90 value 9.203076
## iter 100 value 9.199413
## final  value 9.199413 
## stopped after 100 iterations
## # weights:  31
## initial  value 56.377866 
## iter  10 value 2.592824
## iter  20 value 0.420096
## iter  30 value 0.271593
## iter  40 value 0.221900
## iter  50 value 0.159712
## iter  60 value 0.137396
## iter  70 value 0.128155
## iter  80 value 0.121056
## iter  90 value 0.109916
## iter 100 value 0.105751
## final  value 0.105751 
## stopped after 100 iterations
## # weights:  49
## initial  value 47.569864 
## iter  10 value 3.318375
## iter  20 value 0.302141
## iter  30 value 0.266062
## iter  40 value 0.232458
## iter  50 value 0.197361
## iter  60 value 0.169139
## iter  70 value 0.146873
## iter  80 value 0.134597
## iter  90 value 0.126511
## iter 100 value 0.114335
## final  value 0.114335 
## stopped after 100 iterations
## # weights:  13
## initial  value 58.188528 
## iter  10 value 24.387616
## iter  20 value 24.126265
## iter  30 value 24.124680
## final  value 24.124660 
## converged
## # weights:  31
## initial  value 51.526223 
## iter  10 value 8.398731
## iter  20 value 4.963183
## iter  30 value 3.867587
## iter  40 value 2.925045
## iter  50 value 2.478015
## iter  60 value 2.381370
## iter  70 value 2.374012
## iter  80 value 2.371618
## iter  90 value 2.329656
## iter 100 value 2.288486
## final  value 2.288486 
## stopped after 100 iterations
## # weights:  49
## initial  value 53.611894 
## iter  10 value 6.933767
## iter  20 value 0.030238
## iter  30 value 0.000550
## final  value 0.000087 
## converged
## # weights:  13
## initial  value 50.463447 
## iter  10 value 32.362995
## iter  20 value 30.607819
## final  value 30.607712 
## converged
## # weights:  31
## initial  value 55.119665 
## iter  10 value 20.515404
## iter  20 value 18.947805
## iter  30 value 18.452973
## iter  40 value 18.407001
## final  value 18.406843 
## converged
## # weights:  49
## initial  value 53.051331 
## iter  10 value 18.548364
## iter  20 value 16.429410
## iter  30 value 15.976858
## iter  40 value 15.962644
## iter  50 value 15.962335
## final  value 15.962335 
## converged
## # weights:  13
## initial  value 49.753849 
## iter  10 value 24.053460
## iter  20 value 21.847185
## iter  30 value 12.885010
## iter  40 value 8.399066
## iter  50 value 7.972040
## iter  60 value 7.891578
## iter  70 value 7.843990
## iter  80 value 7.822940
## iter  90 value 7.818793
## iter 100 value 7.808103
## final  value 7.808103 
## stopped after 100 iterations
## # weights:  31
## initial  value 58.887796 
## iter  10 value 7.272140
## iter  20 value 0.202101
## iter  30 value 0.168480
## iter  40 value 0.149096
## iter  50 value 0.136488
## iter  60 value 0.128016
## iter  70 value 0.125784
## iter  80 value 0.123931
## iter  90 value 0.121345
## iter 100 value 0.119671
## final  value 0.119671 
## stopped after 100 iterations
## # weights:  49
## initial  value 51.880379 
## iter  10 value 3.656291
## iter  20 value 0.211031
## iter  30 value 0.182190
## iter  40 value 0.135686
## iter  50 value 0.115796
## iter  60 value 0.101491
## iter  70 value 0.098736
## iter  80 value 0.095314
## iter  90 value 0.093418
## iter 100 value 0.091782
## final  value 0.091782 
## stopped after 100 iterations
## # weights:  49
## initial  value 71.621878 
## iter  10 value 22.337349
## iter  20 value 18.616792
## iter  30 value 18.321087
## iter  40 value 18.303398
## iter  50 value 18.297871
## iter  60 value 18.297708
## final  value 18.297708 
## converged
training_results <- predict(model, training_data)
test_results <- predict(model, test_data)

# Confusion matrix for training data
cmtr <- confusionMatrix(training_results, training_data$cluster_name)
print(cmtr)
## Confusion Matrix and Statistics
## 
##                   Reference
## Prediction         Dangerous State High Crime State Low Crime State Safe State
##   Dangerous State                8                0               0          0
##   High Crime State               0                8               0          0
##   Low Crime State                0                0              16          0
##   Safe State                     0                0               0          8
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9119, 1)
##     No Information Rate : 0.4        
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: Dangerous State Class: High Crime State
## Sensitivity                             1.0                     1.0
## Specificity                             1.0                     1.0
## Pos Pred Value                          1.0                     1.0
## Neg Pred Value                          1.0                     1.0
## Prevalence                              0.2                     0.2
## Detection Rate                          0.2                     0.2
## Detection Prevalence                    0.2                     0.2
## Balanced Accuracy                       1.0                     1.0
##                      Class: Low Crime State Class: Safe State
## Sensitivity                             1.0               1.0
## Specificity                             1.0               1.0
## Pos Pred Value                          1.0               1.0
## Neg Pred Value                          1.0               1.0
## Prevalence                              0.4               0.2
## Detection Rate                          0.4               0.2
## Detection Prevalence                    0.4               0.2
## Balanced Accuracy                       1.0               1.0
# Confusion matrix for testing data
cmts <- confusionMatrix(test_results, test_data$cluster_name)
print(cmts)
## Confusion Matrix and Statistics
## 
##                   Reference
## Prediction         Dangerous State High Crime State Low Crime State Safe State
##   Dangerous State                2                0               0          0
##   High Crime State               0                2               0          0
##   Low Crime State                0                0               4          0
##   Safe State                     0                0               0          2
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.6915, 1)
##     No Information Rate : 0.4        
##     P-Value [Acc > NIR] : 0.0001049  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: Dangerous State Class: High Crime State
## Sensitivity                             1.0                     1.0
## Specificity                             1.0                     1.0
## Pos Pred Value                          1.0                     1.0
## Neg Pred Value                          1.0                     1.0
## Prevalence                              0.2                     0.2
## Detection Rate                          0.2                     0.2
## Detection Prevalence                    0.2                     0.2
## Balanced Accuracy                       1.0                     1.0
##                      Class: Low Crime State Class: Safe State
## Sensitivity                             1.0               1.0
## Specificity                             1.0               1.0
## Pos Pred Value                          1.0               1.0
## Neg Pred Value                          1.0               1.0
## Prevalence                              0.4               0.2
## Detection Rate                          0.4               0.2
## Detection Prevalence                    0.4               0.2
## Balanced Accuracy                       1.0               1.0
LS0tDQp0aXRsZTogIlVTQXJyZXN0cyINCmF1dGhvcjogJ092ZWQgUnVpeiAtIGEwMTE3NDQzNScNCmRhdGU6ICIyMDI1LTAyLTIxIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICAgIHRvYzogVFJVRQ0KICAgICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgICBjb2RlX2Rvd25sb2FkOiBUUlVFDQogICAgICB0aGVtZTogJ2pvdXJuYWwnDQotLS0NCg0KIVtdKEM6XFxVc2Vyc1xcQUNFUlxcRG93bmxvYWRzXFxPSVAgKDEpLmpwZykNCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6IEJsdWU7Ij5Db250ZXh0bzwvc3Bhbj4NCg0KDQpMYSBiYXNlIGRlIGRhdG9zICoqVVNBcnJlc3RzKiogY29udGllbmUgZXN0YWTDrXN0aWNhcyBlbiBhcnJlc3RvcyBwb3IgY2FkYSAxMDAsMDAwIHJlc2lkZW50ZXMgcG9yIGFncmVzacOzbiwgYXNlc2luYXRvIHkgdmlvbGFjacOzbiBlbiBjYWRhIHVubyBkZSBsb3MgNTAgZXN0YWRvcyBkZSBFRS5VVS4gRW4gMTk3My4gIA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjogQmx1ZTsiPkluc3RhbGFyIHBhcXVldGVzeSBsbGFtYXIgbGlicmVyaWFzPC9zcGFuPg0KDQpgYGB7cn0NCmxpYnJhcnkoY2x1c3RlcikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZmFjdG9leHRyYSkNCmxpYnJhcnkoZGF0YS50YWJsZSkNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHNmKQ0KbGlicmFyeShybmF0dXJhbGVhcnRoKQ0KbGlicmFyeShybmF0dXJhbGVhcnRoZGF0YSkNCmxpYnJhcnkoZGV2dG9vbHMpDQpsaWJyYXJ5KGNhcmV0KQ0KbGlicmFyeShkYXRhc2V0cykNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkobGF0dGljZSkNCmxpYnJhcnkoRGF0YUV4cGxvcmVyKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBCbHVlOyI+SW1wb3J0YXIgYmFzZSBkZSBkYXRvczwvc3Bhbj4NCg0KYGBge3J9DQpkZiA8LSBVU0FycmVzdHMNCnN1bW1hcnkoZGYpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6IEJsdWU7Ij5DbHVzdGVyczwvc3Bhbj4NCg0KYGBge3J9DQojIFNjYWxlIHRoZSBkYXRhDQpzY2FsZWRfZGYgPC0gc2NhbGUoZGYpDQojIEdlbmVyYXRlIHRoZSBjbHVzdGVycw0Kbl9jbHVzdGVycyA9IDQNCmttZWFuIDwtIGttZWFucyhzY2FsZWRfZGYsIGNlbnRlcnMgPSBuX2NsdXN0ZXJzKQ0KIyBBc3NpZ24gdGhlIGxhYmVscw0KbGFiZWxzIDwtIGNiaW5kKGRmLCBjbHVzdGVyID0ga21lYW4kY2x1c3RlcikNCiMgUGxvdCB0aGUgY2x1c3RlcnMNCmZ2aXpfY2x1c3RlcihrbWVhbiwgZGF0YSA9IGRmKQ0KYGBgDQoNCmBgYHtyfQ0KIyBPcHRpbWl6ZSB0aGUgY2x1c3RlcnMNCnNldC5zZWVkKDEyMykNCm9wdGltYWwgPC0gY2x1c0dhcChzY2FsZWRfZGYsIEZVTiA9IGttZWFucywgbnN0YXJ0ID0gMSwgSy5tYXggPSAxMCkNCnBsb3Qob3B0aW1hbCwgeGxhYiA9ICdOdW1iZXIgb2YgY2x1c3RlcnMnKQ0KIyBNZXRvZG8gZGUgbGEgc2lsdWV0YQ0KIyBmdml6X25iY2x1c3QoZGYsIGttZWFucywgbWV0aG9kID0gJ3dzcycpICsNCiMgICBnZ3RpdGxlKCdFbGJvdyBtZXRob2QnKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDb21wYXJlIHRoZSBjbHVzdGVycw0KY2x1c3Rlcl9tZWFuIDwtIGFnZ3JlZ2F0ZShsYWJlbHMsIGJ5ID0gbGlzdChsYWJlbHMkY2x1c3RlciksIEZVTiA9IG1lYW4pDQpjbHVzdGVyX21lYW4NCnRhYmxlKGxhYmVscyRjbHVzdGVyKQ0KYGBgDQoNCmBgYHtyfQ0KbmV3X2NsdXN0ZXJfbmFtZXMgPC0gYygiU2FmZSBTdGF0ZSIsICJIaWdoIENyaW1lIFN0YXRlIiwgIkRhbmdlcm91cyBTdGF0ZSIsICJMb3cgQ3JpbWUgU3RhdGUiKQ0KDQpsYWJlbHMgPC0gbGFiZWxzICU+JQ0KICBtdXRhdGUoY2x1c3Rlcl9uYW1lID0gY2FzZV93aGVuKA0KICAgIGNsdXN0ZXIgPT0gMSB+IG5ld19jbHVzdGVyX25hbWVzWzFdLA0KICAgIGNsdXN0ZXIgPT0gMiB+IG5ld19jbHVzdGVyX25hbWVzWzJdLA0KICAgIGNsdXN0ZXIgPT0gMyB+IG5ld19jbHVzdGVyX25hbWVzWzNdLA0KICAgIGNsdXN0ZXIgPT0gNCB+IG5ld19jbHVzdGVyX25hbWVzWzRdDQogICkpDQpwcmludChsYWJlbHMpDQoNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjogQmx1ZTsiPk1vZGVsbyBkZSBSYW5kb20gRm9yZXN0PC9zcGFuPg0KDQpgYGB7cn0NCmxhYmVscyA8LSBsYWJlbHMgJT4lDQogIG11dGF0ZShjbHVzdGVyX25hbWUgPSBhcy5mYWN0b3IoY2x1c3Rlcl9uYW1lKSkgJT4lDQogIHNlbGVjdCgtY2x1c3RlcikNCnByaW50KGxhYmVscykNCmBgYA0KDQpgYGB7cn0NCiMgc3BsaXQgdGhlIGRhdGEgDQpzZXQuc2VlZCgxMjMpDQp0cmFpbmluZyA8LSBjcmVhdGVEYXRhUGFydGl0aW9uKGxhYmVscyRjbHVzdGVyX25hbWUsIHAgPSAwLjgsIGxpc3QgPSBGQUxTRSkNCnRyYWluaW5nX2RhdGEgPC0gbGFiZWxzW3RyYWluaW5nLCBdDQp0ZXN0X2RhdGEgPC0gbGFiZWxzWy10cmFpbmluZywgXQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWwgPC0gdHJhaW4oDQogIGNsdXN0ZXJfbmFtZSB+IC4sIA0KICBkYXRhID0gdHJhaW5pbmdfZGF0YSwNCiAgbWV0aG9kID0gJ25uZXQnLA0KICBwcmVQcm9jZXNzID0gYygnc2NhbGUnLCAnY2VudGVyJyksDQogIHRyQ29udHJvbCA9IHRyYWluQ29udHJvbChtZXRob2QgPSAnY3YnLCBudW1iZXIgPSAxMCkNCikNCg0KdHJhaW5pbmdfcmVzdWx0cyA8LSBwcmVkaWN0KG1vZGVsLCB0cmFpbmluZ19kYXRhKQ0KdGVzdF9yZXN1bHRzIDwtIHByZWRpY3QobW9kZWwsIHRlc3RfZGF0YSkNCg0KIyBDb25mdXNpb24gbWF0cml4IGZvciB0cmFpbmluZyBkYXRhDQpjbXRyIDwtIGNvbmZ1c2lvbk1hdHJpeCh0cmFpbmluZ19yZXN1bHRzLCB0cmFpbmluZ19kYXRhJGNsdXN0ZXJfbmFtZSkNCnByaW50KGNtdHIpDQoNCiMgQ29uZnVzaW9uIG1hdHJpeCBmb3IgdGVzdGluZyBkYXRhDQpjbXRzIDwtIGNvbmZ1c2lvbk1hdHJpeCh0ZXN0X3Jlc3VsdHMsIHRlc3RfZGF0YSRjbHVzdGVyX25hbWUpDQpwcmludChjbXRzKQ0KYGBgDQoNCg==