ETL

Aplicación de Clustering usando el Índice de Libertad Económica y el Índice de Democracia

library(pacman)

Warning: package 'pacman' was built under R version 4.5.2

p_load(umap, cluster, factoextra, tidyverse, skimr, naniar, 
       tictoc, DataExplorer, ggplot2, plotly, psych, NbClust, dbscan, ggrepel)

Importación y preparación de la data

Se carga el primer dataset que corresponde al Índice de Libertad Económica.

library(readr)
library(dplyr)

efi <- read_csv(
  "index_economic_freedom.csv",
  skip = 3,            # saltar las 3 líneas de texto
  na = c("N/A"),        # tratar "N/A" como NA
  col_select = -c('Index Year') # no se lee esa columna
)

Rows: 184 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (1): Country
dbl (13): Overall Score, Property Rights, Government Integrity, Judicial Eff...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(efi)

# A tibble: 6 × 14
  Country     `Overall Score` `Property Rights` `Government Integrity`
  <chr>                 <dbl>             <dbl>                  <dbl>
1 Afghanistan            NA                 7.4                   14.1
2 Albania                66.6              58.3                   39.6
3 Algeria                47.5              27.6                   29.5
4 Angola                 55                39.9                   28.2
5 Argentina              54.2              34.7                   39.7
6 Armenia                65.4              49.9                   50.4
# ℹ 10 more variables: `Judicial Effectiveness` <dbl>, `Tax Burden` <dbl>,
#   `Government Spending` <dbl>, `Fiscal Health` <dbl>,
#   `Business Freedom` <dbl>, `Labor Freedom` <dbl>, `Monetary Freedom` <dbl>,
#   `Trade Freedom` <dbl>, `Investment Freedom` <dbl>,
#   `Financial Freedom` <dbl>

Cargamos los dataset de civil liberties y electoral pluralism del Índice de Democracia.

pluralism <- read_csv("electoral_pluralism_index.csv")

Rows: 166 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Entity, World regions according to OWID
dbl (2): Year, Free and fair elections

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

liberties <- read_csv("civil_liberties_index.csv")

Rows: 166 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Entity, World regions according to OWID
dbl (2): Year, Civil liberties

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(pluralism)

# A tibble: 6 × 4
  Entity       Year `Free and fair elections` `World regions according to OWID`
  <chr>       <dbl>                     <dbl> <chr>                            
1 Afghanistan  2024                      0    Asia                             
2 Albania      2024                      7    Europe                           
3 Algeria      2024                      3.08 Africa                           
4 Angola       2024                      4.5  Africa                           
5 Argentina    2024                      9.17 South America                    
6 Armenia      2024                      7.92 Asia

head(liberties)

# A tibble: 6 × 4
  Entity       Year `Civil liberties` `World regions according to OWID`
  <chr>       <dbl>             <dbl> <chr>                            
1 Afghanistan  2024              0    Asia                             
2 Albania      2024              7.06 Europe                           
3 Algeria      2024              3.82 Africa                           
4 Angola       2024              2.35 Africa                           
5 Argentina    2024              8.53 South America                    
6 Armenia      2024              5.29 Asia

Nos quedamos solo con las columnas de interés, y estandarizamos el nombre de la columna que corresponde al país.

pluralism <- pluralism |> 
  select(-Year, -'World regions according to OWID') |> 
  rename(Country = Entity) 

liberties <- liberties |> 
  select(-Year) |> 
  rename(Country = Entity) |> 
  rename(`World regions` = `World regions according to OWID`) # más corto

head(liberties)

# A tibble: 6 × 3
  Country     `Civil liberties` `World regions`
  <chr>                   <dbl> <chr>          
1 Afghanistan              0    Asia           
2 Albania                  7.06 Europe         
3 Algeria                  3.82 Africa         
4 Angola                   2.35 Africa         
5 Argentina                8.53 South America  
6 Armenia                  5.29 Asia

You can add options to executable code like this

library(dplyr)

# 1. Vectores de paíse
c_efi  <- sort(unique(efi$Country))
c_pluralism <- sort(unique(pluralism$Country))
c_liberies     <- sort(unique(liberties$Country))

# 2. Todos los países que aparecen en al menos una de las bases
all_countries <- sort(unique(c(c_efi, c_pluralism, c_liberies)))

# 3. Tabla de presencia
presencia <- tibble(
  Country      = all_countries,
  in_cfi  = Country %in% c_efi,
  in_pluralism = Country %in% c_pluralism,
  in_liberties     = Country %in% c_liberies
)

# 4. Ver SOLO los que NO están presentes en las 3 a la vez
presencia_problema <- presencia |> 
  filter(!(in_cfi & in_pluralism & in_liberties)) |> 
  arrange(Country)

presencia_problema

# A tibble: 40 × 4
   Country           in_cfi in_pluralism in_liberties
   <chr>             <lgl>  <lgl>        <lgl>       
 1 Barbados          TRUE   FALSE        FALSE       
 2 Belize            TRUE   FALSE        FALSE       
 3 Brunei Darussalam TRUE   FALSE        FALSE       
 4 Burma             TRUE   FALSE        FALSE       
 5 Cabo Verde        TRUE   FALSE        FALSE       
 6 Cape Verde        FALSE  TRUE         TRUE        
 7 Congo             FALSE  TRUE         TRUE        
 8 Cote d'Ivoire     FALSE  TRUE         TRUE        
 9 Czech Republic    TRUE   FALSE        FALSE       
10 Czechia           FALSE  TRUE         TRUE        
# ℹ 30 more rows

View(presencia_problema)

#TRUE significa que ese nombre de país está presente en ese dataset, y FALSE que no está presente.

Podemos observar los nombres de países que están presente en el dataset del Ínidce de Libertad Econnómica pero no en los del ïndice de Democracia (Pluralism and Liberties), y viceversa también.

Sin embargo, podemos notar que algunos nombres de países diferentes hacen referencia al mismo país debido a que el nombre de algunos países son escritos de manera diferente en distintas bases de datos. Entonces, hay que identificarlos y luego elegir una sola forma de nombrarlos para poder hacer el merge.

Para estandarizar los nombres de los países se tomará como referencia al Índice de Libertad Económica.

pluralism <- pluralism |> 
  mutate(
    Country = recode(
      Country,
      "Cape Verde"  = "Cabo Verde",
      "Congo"       = "Republic of Congo",
      "Cote d'Ivoire" = "Côte d'Ivoire",
      "Czechia"     = "Czech Republic",
      "East Timor"  = "Timor-Leste",
      "Gambia"      = "The Gambia",
      "Kyrgyzstan"  = "Kyrgyz Republic",
      "Myanmar"     = "Burma",
      "Philippines" = "The Philippines",
      "Turkey"      = "Türkiye"
    )
  )

liberties <- liberties |> 
  mutate(
    Country = recode(
      Country,
      "Cape Verde"  = "Cabo Verde",
      "Congo"       = "Republic of Congo",
      "Cote d'Ivoire" = "Côte d'Ivoire",
      "Czechia"     = "Czech Republic",
      "East Timor"  = "Timor-Leste",
      "Gambia"      = "The Gambia",
      "Kyrgyzstan"  = "Kyrgyz Republic",
      "Myanmar"     = "Burma",
      "Philippines" = "The Philippines",
      "Turkey"      = "Türkiye"
    )
  )

Merge

Se procede a unir los dataset para tener la data con todas las variables necesarias para usarla en el algoritmo.

# Une datasets en base a la variable "Country"
data <- efi |> 
  inner_join(pluralism, by = "Country") |> 
  inner_join(liberties, by = "Country")
row.names(data) <- data$Country # Renombrar las filas como países

Warning: Setting row names on a tibble is deprecated.

head(data)

# A tibble: 6 × 17
  Country     `Overall Score` `Property Rights` `Government Integrity`
  <chr>                 <dbl>             <dbl>                  <dbl>
1 Afghanistan            NA                 7.4                   14.1
2 Albania                66.6              58.3                   39.6
3 Algeria                47.5              27.6                   29.5
4 Angola                 55                39.9                   28.2
5 Argentina              54.2              34.7                   39.7
6 Armenia                65.4              49.9                   50.4
# ℹ 13 more variables: `Judicial Effectiveness` <dbl>, `Tax Burden` <dbl>,
#   `Government Spending` <dbl>, `Fiscal Health` <dbl>,
#   `Business Freedom` <dbl>, `Labor Freedom` <dbl>, `Monetary Freedom` <dbl>,
#   `Trade Freedom` <dbl>, `Investment Freedom` <dbl>,
#   `Financial Freedom` <dbl>, `Free and fair elections` <dbl>,
#   `Civil liberties` <dbl>, `World regions` <chr>

Librerias:

library(pacman)
p_load(umap, cluster, factoextra, tidyverse, skimr, naniar, 
       tictoc, DataExplorer, ggplot2, plotly, psych, NbClust)

EDA

Estructura de datos

# resumen general 
skim(data)

Data summary
Name	data
Number of rows	165
Number of columns	17
_______________________
Column type frequency:
character	2
numeric	15
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
Country	0	1	4	28	0	165	0
World regions	0	1	4	13	0	6	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Overall Score	6	0.96	59.57	11.95	3.0	51.85	59.30	68.50	84.1	▁▁▃▇▅
Property Rights	0	1.00	52.23	26.60	0.7	30.80	47.40	74.10	100.0	▂▆▇▃▆
Government Integrity	0	1.00	42.92	22.69	3.3	25.10	39.30	57.20	98.0	▃▇▃▂▂
Judicial Effectiveness	0	1.00	46.50	28.07	2.7	26.50	43.10	68.10	99.3	▆▇▇▃▅
Tax Burden	6	0.96	77.65	12.99	0.0	71.50	78.30	86.45	99.9	▁▁▁▇▇
Government Spending	6	0.96	67.50	23.63	0.0	55.60	73.80	85.85	98.1	▁▂▃▆▇
Fiscal Health	6	0.96	63.13	30.30	0.0	44.60	70.80	89.55	100.0	▃▂▃▅▇
Business Freedom	6	0.96	63.39	17.80	5.0	50.45	67.10	77.90	93.0	▁▃▅▇▇
Labor Freedom	6	0.96	56.43	10.23	5.0	52.30	57.30	62.10	81.8	▁▁▂▇▂
Monetary Freedom	6	0.96	67.18	15.03	0.0	67.15	70.90	74.60	88.7	▁▁▁▇▇
Trade Freedom	5	0.97	70.71	11.90	0.0	64.85	72.20	79.60	95.0	▁▁▂▇▆
Investment Freedom	6	0.96	54.18	20.58	0.0	45.00	60.00	70.00	95.0	▁▂▆▇▂
Financial Freedom	6	0.96	48.77	18.55	0.0	40.00	50.00	60.00	80.0	▁▃▃▇▃
Free and fair elections	0	1.00	5.45	4.00	0.0	0.50	6.58	9.58	10.0	▆▁▂▂▇
Civil liberties	0	1.00	5.35	2.86	0.0	2.94	5.29	7.94	10.0	▅▇▇▆▇

Se observa presencia de missings

Missings

# visualización de patrón de NA
gg_miss_var(data)

Analizando la cantidad de missings.

sum(!complete.cases(data)) # mostrar observaciones con datos faltantes "NA"

[1] 6

sum(!complete.cases(data)) / nrow(data) * 100 # mostrar porcentaje % que representan estas observaciones

[1] 3.636364

La cantidad de observaciones con NA’s representan el 3.6% del total. Así que se puede optar por eliminarlas.

Visualización

Distribuciones

# Histogramas
data |> 
  pivot_longer(cols = -c(Country, `Overall Score`, `World regions`), names_to = "Variable", values_to = "Valor") |> 
  ggplot(aes(x = Valor)) +
  geom_histogram(fill = "#69b3a2", bins = 30) +
  facet_wrap(~Variable, scales = "free") +
  theme_minimal()

Warning: Removed 53 rows containing non-finite outside the scale range
(`stat_bin()`).

Boxplots

data |> 
  select(-Country, -`Overall Score`, -`World regions`) |> 
  pivot_longer(everything(), names_to = "Variable", values_to = "Valor") |>   ggplot(aes(x = Variable, y = Valor)) +
  geom_boxplot(fill = "orange") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Distribución de todas las variables", x = NULL, y = NULL)

Warning: Removed 53 rows containing non-finite outside the scale range
(`stat_boxplot()`).

# 1. Transformar a formato largo
top10_long <- data |> 
  pivot_longer(cols = -c(Country, `World regions`), names_to = "Variable", values_to = "Valor") |> 
  group_by(Variable) |> 
  slice_max(order_by = Valor, n = 10) |> 
  ungroup()

# 2. Visualizar todos los Top 10 con facet_wrap
ggplot(top10_long, aes(x = reorder(Country, Valor), y = Valor)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  facet_wrap(~ Variable, scales = "free_y") +
  labs(title = "Top 10 países por cada variable", x = NULL, y = NULL) +
  theme_minimal()

Preprocesamiento

Eliminación de missings

data <- na.omit(data) 
sum(!complete.cases(data))

[1] 0

Selección de variables

Crear una subdata con las variables numéricas que se van a utilizar

subdata<-data |> 
            select(3:16) # Solo las columnas que son nuestras variables a utilizar
row.names(subdata) <- data$Country # Nombrando las filas con los países

Warning: Setting row names on a tibble is deprecated.

head(subdata)

# A tibble: 6 × 14
  `Property Rights` `Government Integrity` `Judicial Effectiveness` `Tax Burden`
              <dbl>                  <dbl>                    <dbl>        <dbl>
1              58.3                   39.6                     59.2         88.8
2              27.6                   29.5                     29.8         80.3
3              39.9                   28.2                     25.7         86.9
4              34.7                   39.7                     56.5         70.8
5              49.9                   50.4                     31.3         88.1
6              90.4                   86.2                     95.3         62.1
# ℹ 10 more variables: `Government Spending` <dbl>, `Fiscal Health` <dbl>,
#   `Business Freedom` <dbl>, `Labor Freedom` <dbl>, `Monetary Freedom` <dbl>,
#   `Trade Freedom` <dbl>, `Investment Freedom` <dbl>,
#   `Financial Freedom` <dbl>, `Free and fair elections` <dbl>,
#   `Civil liberties` <dbl>

Análisis de Componentes Principales

Correlación

Relación de correlaciones

library(corrplot)

Warning: package 'corrplot' was built under R version 4.5.2

corrplot 0.95 loaded

corrplot(cor(subdata, use="complete.obs"),
         method="number", type="lower",
         tl.cex=0.7,          # etiquetas (nombres variables)
         number.cex=0.7,      # números dentro de celdas
         number.digits=1,
         mar=c(0,0,1,0))

Se observa claramente bloques de variables con correlaciones altas, lo que no solo da buen indicio para justificar la aplicación de ACP, sino también replantear la aplicación de Clustering.

Matriz de Correlación

# Matriz de correlación
cor_data <- subdata
cor_matrix <- cor(cor_data)

# Convertir a tidy para ggplot2
cor_long <- cor_matrix |> 
  as.data.frame() |> 
  rownames_to_column("Var1") |> 
  pivot_longer(-Var1, names_to = "Var2", values_to = "Correlation")

# Gráfico tipo heatmap
ggplot(cor_long, aes(Var1, Var2, fill = Correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
  theme_minimal() +
  coord_fixed() +
  labs(title = "Matriz de correlaciones") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Tests de Verificación

Kaiser-Meyer-Olkin (KMO)

psych::KMO(subdata)

Kaiser-Meyer-Olkin factor adequacy
Call: psych::KMO(r = subdata)
Overall MSA =  0.9
MSA for each item = 
        Property Rights    Government Integrity  Judicial Effectiveness 
                   0.93                    0.93                    0.91 
             Tax Burden     Government Spending           Fiscal Health 
                   0.63                    0.86                    0.68 
       Business Freedom           Labor Freedom        Monetary Freedom 
                   0.95                    0.95                    0.91 
          Trade Freedom      Investment Freedom       Financial Freedom 
                   0.93                    0.88                    0.91 
Free and fair elections         Civil liberties 
                   0.89                    0.88

# Interpretación: el conjunto de variables tiene correlaciones suficientes para reducción de dimensión mayor a 0.6

Mucho mayor a 0.6

Prueba de Esfericidad de Bartlett

# cortest.bartlett(matrixcor,
#                 n=dim(subdata)[1])

# Rechazo H0, la matriz de correlación no es una identidad, y por tanto es válida para PCA.

El p-valor obtenido es menor a 0.05 entonces RECHAZAMOS la hipótesis nula y concluimos que NO ES UNA MATRIZ DE IDENTIDAD. Ergo, hay correlación significativa entre los indicadores que conforman este dataset.

Optimización en la Determinación del Número de Componentes Principales

scree(cor_matrix, 
      pc=TRUE, 
      factors=FALSE)

O también el análisis paralelo (utiliza simulaciones con bootstrap). En este caso explícitamente nos recomienda un número de componentes.

fa.parallel(cor_matrix,fa="pc")

Warning in fa.parallel(cor_matrix, fa = "pc"): It seems as if you are using a
correlation matrix, but have not specified the number of cases. The number of
subjects is arbitrarily set to be 100

Parallel analysis suggests that the number of factors =  NA  and the number of components =  2

El número de componentes recomendados por el algoritmo es 2

Escalamiento

Estandarización robusta del Dataset

# centrar por mediana, escalar por IQR
subdata <- scale(
  subdata, 
  center = apply(subdata, 2, median),  # centrado por mediana
  scale = apply(subdata, 2, IQR)       # escala por IQR
)

Cálculo de los componentes principales

# PCA con prcomp sobre datos robustamente escalados
pc <- prcomp(subdata, center = FALSE, scale. = FALSE)

Análisis del PCA calculado

Varianza:

summary(pc)

Importance of components:
                          PC1    PC2    PC3     PC4     PC5     PC6     PC7
Standard deviation     2.5314 1.5382 0.9451 0.77191 0.64829 0.62707 0.49760
Proportion of Variance 0.5381 0.1987 0.0750 0.05003 0.03529 0.03302 0.02079
Cumulative Proportion  0.5381 0.7367 0.8117 0.86177 0.89705 0.93007 0.95086
                           PC8     PC9    PC10   PC11    PC12    PC13   PC14
Standard deviation     0.42610 0.39613 0.30846 0.2846 0.17791 0.14908 0.1293
Proportion of Variance 0.01524 0.01318 0.00799 0.0068 0.00266 0.00187 0.0014
Cumulative Proportion  0.96611 0.97928 0.98727 0.9941 0.99673 0.99860 1.0000

Visualización gráfica

fviz_eig(pc)

Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
Ignoring empty aesthetic: `width`.

Loadings o cargas de cada PC

pc$rotation

                                PC1         PC2         PC3           PC4
Property Rights         -0.16918054 -0.25553830  0.01782990  0.0003481359
Government Integrity    -0.18596478 -0.32624466  0.02456186 -0.0310945665
Judicial Effectiveness  -0.16394167 -0.28320213  0.04871601 -0.0995664198
Tax Burden              -0.01308185  0.31262578 -0.64734316 -0.0664125014
Government Spending      0.06406693  0.34839387 -0.36920735 -0.0306635255
Fiscal Health           -0.06563946  0.07216788 -0.25804784  0.3776839353
Business Freedom        -0.20528153 -0.17119720 -0.14615680  0.0693550373
Labor Freedom           -0.30196330 -0.13663665 -0.27449635 -0.7273478966
Monetary Freedom        -0.73283571  0.56632403  0.36164920 -0.0016684511
Trade Freedom           -0.23601626 -0.12162914 -0.30612434  0.0763720762
Investment Freedom      -0.25563386 -0.14183553 -0.14448551  0.3963758507
Financial Freedom       -0.28303691 -0.23188001 -0.17609297  0.3628177534
Free and fair elections -0.11063381 -0.14232899 -0.03064571 -0.0596707156
Civil liberties         -0.13251918 -0.21471369 -0.02670914 -0.0955605280
                                PC5         PC6         PC7          PC8
Property Rights         -0.10896271  0.07443083  0.25656632 -0.224496018
Government Integrity    -0.09549726  0.16422891  0.38090198 -0.367231713
Judicial Effectiveness   0.01762045  0.12381360  0.33161523 -0.032897560
Tax Burden              -0.51569586  0.10888463  0.02293432 -0.174029412
Government Spending      0.39944419 -0.32038390  0.62968359  0.094489996
Fiscal Health            0.59253612  0.58669959 -0.17473181 -0.189483087
Business Freedom        -0.01836113  0.11128857  0.10801869  0.142696693
Labor Freedom            0.31379949 -0.13548964 -0.36630580 -0.163151899
Monetary Freedom        -0.04919648  0.06551258  0.05191776 -0.006760781
Trade Freedom           -0.27036857  0.28128161 -0.10556792  0.468516795
Investment Freedom       0.07406099 -0.48923349 -0.21553703  0.142479671
Financial Freedom       -0.02902011 -0.36959578 -0.07230880 -0.309142209
Free and fair elections  0.11171839  0.02095913  0.09185119  0.506479159
Civil liberties          0.09468411  0.05382774  0.17818723  0.315621034
                                PC9        PC10         PC11         PC12
Property Rights          0.01149807 -0.17840794 -0.012296550 -0.627087269
Government Integrity    -0.07772265 -0.24065022  0.038567257  0.680342954
Judicial Effectiveness   0.07581866  0.01024401  0.303627125 -0.315961411
Tax Burden               0.39850185 -0.01028890  0.092401643  0.003854859
Government Spending     -0.25908961 -0.02039591 -0.014834550 -0.014230927
Fiscal Health            0.09971991 -0.01647018  0.086680479 -0.024103465
Business Freedom         0.09109417 -0.06273389 -0.877575879 -0.062107328
Labor Freedom           -0.07216014 -0.04018308 -0.022499524 -0.002653441
Monetary Freedom         0.02920561  0.02293477  0.016583595  0.013014772
Trade Freedom           -0.64671650 -0.01603066  0.163782747  0.002239028
Investment Freedom       0.17883511 -0.60350167  0.152322267  0.020863757
Financial Freedom       -0.10364172  0.67385905  0.001820971  0.018734321
Free and fair elections  0.39762965  0.23582124 -0.007407836  0.194447748
Civil liberties          0.34463013  0.17063080  0.262477764  0.025101730
                                PC13         PC14
Property Rights          0.560383199 -0.180947400
Government Integrity     0.111622701 -0.024660001
Judicial Effectiveness  -0.535926725  0.518507330
Tax Burden              -0.027012764  0.042751620
Government Spending      0.036326828 -0.018598151
Fiscal Health            0.033151309  0.009491218
Business Freedom        -0.265725749 -0.028032349
Labor Freedom            0.025976248  0.011381529
Monetary Freedom        -0.001657757 -0.012510445
Trade Freedom            0.036921605 -0.019863811
Investment Freedom      -0.057905789  0.042027632
Financial Freedom        0.005946078  0.004284610
Free and fair elections  0.503753626  0.423994437
Civil liberties         -0.230950659 -0.715900831

Otro gráfico interesante es el de cargas de las doce variables originales en dos dimensiones:

fviz_pca_var(pc, col.var = "contrib", gradient.cols = c("blue", "orange", "red"))

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the ggpubr package.
  Please report the issue at <https://github.com/kassambara/ggpubr/issues>.

Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
ℹ The deprecated feature was likely used in the factoextra package.
  Please report the issue at <https://github.com/kassambara/factoextra/issues>.

pca_df <- as.data.frame(pc$x[, 1:2])  # PC1 y PC2
rownames(pca_df) <- rownames(subdata)

Creación de dataset con los PC’s

data_pcas <- as.data.frame(pc$x[, 1:14])

colnames(data_pcas) <- paste0("pc", 1:14)

head(data_pcas)

                 pc1         pc2         pc3         pc4         pc5        pc6
Albania   -0.8255053  0.19039181 -0.39994243  0.63923704 -0.74936976  0.4278672
Algeria    1.5730929  1.19715938  1.06931424 -0.91597235 -0.31554193  0.8797876
Angola     1.7900501  0.88299451 -0.40587299 -0.00836497 -0.07523845  0.6447173
Argentina  4.6479039 -4.10439415 -1.61500688  0.28340543  0.52267119 -0.7353509
Armenia   -0.5131841  0.08812675 -0.70114624  0.26975751 -0.11598293 -0.1836875
Australia -2.5868146 -2.42894264  0.06025699  0.32120226  0.20770736  0.1429676
                  pc7         pc8         pc9         pc10        pc11
Albania    0.35342161  0.21261147 -0.01033754  0.356594265  0.18135755
Algeria    0.09623137 -0.26367017  0.33267732  0.029646242 -0.34112572
Angola     0.20779448 -0.33273726 -0.33011540  0.378305376  0.27539475
Argentina -0.53626977  0.06932088  0.36451979  0.353637585  0.42207017
Armenia   -0.06232710 -0.33884228  0.26606182 -0.009472341 -0.14130835
Australia  0.38614570 -0.10429492 -0.59729526  0.133323764  0.04700138
                 pc12       pc13        pc14
Albania   -0.23526534 -0.2087663 -0.05304177
Algeria    0.07897747 -0.2094832 -0.04317676
Angola    -0.05984916  0.3374372  0.16368263
Argentina  0.15666941 -0.2821137  0.01757029
Armenia    0.34859675  0.1924481 -0.02981442
Australia  0.13150320 -0.1380388 -0.05629548

Clustering

subdata: variables originales seleccionadas y escaladas
data_pcas: data con los componentes PC1 y PC2

Análisis visual en 2 dimensiones

# Proyección de los países en las 2 primeras dimensiones
fviz_pca_ind(pc,
             geom.ind = "point",
             col.ind = "cos2",
             gradient.cols = c("blue", "orange", "red"),
             repel = TRUE,
             label = "none")

Clustering con variables Originales

Clúster Jerárquico

Cálculo de distancias

distancias <- dist(subdata, method = "euclidean")

Determinación del número de clusters

Métodos gráficos:

Utilizaremos el método del codo (elbow method) mediante la función fviz_nbclust() del paquete factoextra.

Primero usamos el criterio de Suma de Cuadrados dentro de clusters:

fviz_nbclust(subdata,                # Trabajar con todos los indicadores del índice
             hcut,                   # Método de clusterización elegido  
             diss = distancias,      # Euclidiana
             method = "wss",         # Método
             k.max = 15,             # Pruebo con 15 cluster
             verbose = F)

Usando el gráfico de silueta:

fviz_nbclust(subdata,                # Trabajar con todos los indicadores del índice
             hcut,                   # Método de clusterización elegido  
             diss = distancias,      # Euclidiana
             method = "silhouette",  # Método
             k.max = 15,             # Pruebo con 15 cluster
             verbose = F)

Métodos Robustos:

Complementaremos el análisis visual con la función NbClust() que aplica simultáneamente hasta 30 índices diferentes para determinar el número óptimo de clusters.

library(NbClust)
set.seed(2025)
res.nbclust1 <- NbClust(subdata,                 # Data
                       distance = "euclidean",  # Distancia utilizada
                       min.nc = 2,              # Mínimo
                       max.nc = 10,             # Máximo
                       method = "ward.D",       # Método
                       index ="all")            # Índices considerados

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 3 proposed 2 as the best number of clusters 
* 12 proposed 3 as the best number of clusters 
* 1 proposed 4 as the best number of clusters 
* 4 proposed 5 as the best number of clusters 
* 1 proposed 8 as the best number of clusters 
* 1 proposed 9 as the best number of clusters 
* 1 proposed 10 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
*******************************************************************

La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.

Método Aglomerativo: Cálculo del clúster aglomerativo

Para k=3

aglomerativo1 = hcut(x = distancias,       # Matriz de distancias
                    k = 3,                # Se visualizarán 2
                    hc_func='agnes',      # Aglomerativo
                    hc_method = "ward.D") # Criterio para aglomerar

Primero, veamos la asignación de cluster:

fviz_dend(aglomerativo1, # Nuestro objeto cluster creado
          rect = TRUE,  # Resalta los cluster solicitados
          cex = 0.5)    # Tamaño de etiquetas

Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
ℹ The deprecated feature was likely used in the factoextra package.
  Please report the issue at <https://github.com/kassambara/factoextra/issues>.

Validación de clústers:

fviz_silhouette(aglomerativo1, label=TRUE)

  cluster size ave.sil.width
1       1  107          0.27
2       2   14          0.16
3       3   38          0.44

Identificación de casos mal clasificados:

aglomerativo1$silinfo$widths |> data.frame() |> filter(sil_width<0)

             cluster neighbor    sil_width
Serbia             1        3 -0.006449935
Bulgaria           1        3 -0.012346104
Romania            1        3 -0.043132571
Cabo Verde         1        3 -0.070608276
Jamaica            1        3 -0.083313477
Botswana           1        3 -0.085875734
Uruguay            1        3 -0.102904653
Hungary            1        3 -0.170913621
Mauritius          1        3 -0.251016040
Türkiye            2        1 -0.027284535
Sierra Leone       2        1 -0.089941334
Ethiopia           2        1 -0.104669698
Suriname           2        1 -0.147607920
Laos               2        1 -0.239642155

14 observaciones (países) mal clasificados

# Crear tabla País - Clúster
cluster_aglo1 <- data.frame(
  Country = data$Country,
  Cluster = aglomerativo1$cluster
)

# Ver la lista de países por clúster
cluster_aglo1 |> 
  arrange(Cluster)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                     Azerbaijan       1
6                        Bahrain       1
7                     Bangladesh       1
8                        Belarus       1
9                          Benin       1
10                        Bhutan       1
11                       Bolivia       1
12        Bosnia and Herzegovina       1
13                      Botswana       1
14                        Brazil       1
15                      Bulgaria       1
16                  Burkina Faso       1
17                         Burma       1
18                       Burundi       1
19                    Cabo Verde       1
20                      Cambodia       1
21                      Cameroon       1
22      Central African Republic       1
23                          Chad       1
24                         China       1
25                      Colombia       1
26                       Comoros       1
27                    Costa Rica       1
28                 Côte d'Ivoire       1
29  Democratic Republic of Congo       1
30                      Djibouti       1
31            Dominican Republic       1
32                       Ecuador       1
33                         Egypt       1
34                   El Salvador       1
35             Equatorial Guinea       1
36                       Eritrea       1
37                      Eswatini       1
38                          Fiji       1
39                         Gabon       1
40                       Georgia       1
41                         Ghana       1
42                     Guatemala       1
43                        Guinea       1
44                 Guinea-Bissau       1
45                        Guyana       1
46                      Honduras       1
47                       Hungary       1
48                         India       1
49                     Indonesia       1
50                       Jamaica       1
51                        Jordan       1
52                    Kazakhstan       1
53                         Kenya       1
54                        Kuwait       1
55               Kyrgyz Republic       1
56                       Lesotho       1
57                       Liberia       1
58                    Madagascar       1
59                        Malawi       1
60                      Malaysia       1
61                          Mali       1
62                    Mauritania       1
63                     Mauritius       1
64                        Mexico       1
65                       Moldova       1
66                      Mongolia       1
67                    Montenegro       1
68                       Morocco       1
69                    Mozambique       1
70                       Namibia       1
71                         Nepal       1
72                     Nicaragua       1
73                         Niger       1
74                       Nigeria       1
75               North Macedonia       1
76                          Oman       1
77                      Pakistan       1
78                        Panama       1
79              Papua New Guinea       1
80                      Paraguay       1
81                          Peru       1
82                         Qatar       1
83             Republic of Congo       1
84                       Romania       1
85                        Russia       1
86                        Rwanda       1
87                  Saudi Arabia       1
88                       Senegal       1
89                        Serbia       1
90                  South Africa       1
91                     Sri Lanka       1
92                    Tajikistan       1
93                      Tanzania       1
94                      Thailand       1
95                    The Gambia       1
96               The Philippines       1
97                   Timor-Leste       1
98                          Togo       1
99           Trinidad and Tobago       1
100                      Tunisia       1
101                 Turkmenistan       1
102                       Uganda       1
103         United Arab Emirates       1
104                      Uruguay       1
105                   Uzbekistan       1
106                      Vietnam       1
107                       Zambia       1
108                    Argentina       2
109                         Cuba       2
110                     Ethiopia       2
111                        Haiti       2
112                         Iran       2
113                         Laos       2
114                      Lebanon       2
115                  North Korea       2
116                 Sierra Leone       2
117                        Sudan       2
118                     Suriname       2
119                      Türkiye       2
120                    Venezuela       2
121                     Zimbabwe       2
122                    Australia       3
123                      Austria       3
124                      Belgium       3
125                       Canada       3
126                        Chile       3
127                      Croatia       3
128                       Cyprus       3
129               Czech Republic       3
130                      Denmark       3
131                      Estonia       3
132                      Finland       3
133                       France       3
134                      Germany       3
135                       Greece       3
136                      Iceland       3
137                      Ireland       3
138                       Israel       3
139                        Italy       3
140                        Japan       3
141                       Latvia       3
142                    Lithuania       3
143                   Luxembourg       3
144                        Malta       3
145                  Netherlands       3
146                  New Zealand       3
147                       Norway       3
148                       Poland       3
149                     Portugal       3
150                    Singapore       3
151                     Slovakia       3
152                     Slovenia       3
153                  South Korea       3
154                        Spain       3
155                       Sweden       3
156                  Switzerland       3
157                       Taiwan       3
158               United Kingdom       3
159                United States       3

cluster_aglo1 |> filter(Cluster == 1)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                     Azerbaijan       1
6                        Bahrain       1
7                     Bangladesh       1
8                        Belarus       1
9                          Benin       1
10                        Bhutan       1
11                       Bolivia       1
12        Bosnia and Herzegovina       1
13                      Botswana       1
14                        Brazil       1
15                      Bulgaria       1
16                  Burkina Faso       1
17                         Burma       1
18                       Burundi       1
19                    Cabo Verde       1
20                      Cambodia       1
21                      Cameroon       1
22      Central African Republic       1
23                          Chad       1
24                         China       1
25                      Colombia       1
26                       Comoros       1
27                    Costa Rica       1
28                 Côte d'Ivoire       1
29  Democratic Republic of Congo       1
30                      Djibouti       1
31            Dominican Republic       1
32                       Ecuador       1
33                         Egypt       1
34                   El Salvador       1
35             Equatorial Guinea       1
36                       Eritrea       1
37                      Eswatini       1
38                          Fiji       1
39                         Gabon       1
40                       Georgia       1
41                         Ghana       1
42                     Guatemala       1
43                        Guinea       1
44                 Guinea-Bissau       1
45                        Guyana       1
46                      Honduras       1
47                       Hungary       1
48                         India       1
49                     Indonesia       1
50                       Jamaica       1
51                        Jordan       1
52                    Kazakhstan       1
53                         Kenya       1
54                        Kuwait       1
55               Kyrgyz Republic       1
56                       Lesotho       1
57                       Liberia       1
58                    Madagascar       1
59                        Malawi       1
60                      Malaysia       1
61                          Mali       1
62                    Mauritania       1
63                     Mauritius       1
64                        Mexico       1
65                       Moldova       1
66                      Mongolia       1
67                    Montenegro       1
68                       Morocco       1
69                    Mozambique       1
70                       Namibia       1
71                         Nepal       1
72                     Nicaragua       1
73                         Niger       1
74                       Nigeria       1
75               North Macedonia       1
76                          Oman       1
77                      Pakistan       1
78                        Panama       1
79              Papua New Guinea       1
80                      Paraguay       1
81                          Peru       1
82                         Qatar       1
83             Republic of Congo       1
84                       Romania       1
85                        Russia       1
86                        Rwanda       1
87                  Saudi Arabia       1
88                       Senegal       1
89                        Serbia       1
90                  South Africa       1
91                     Sri Lanka       1
92                    Tajikistan       1
93                      Tanzania       1
94                      Thailand       1
95                    The Gambia       1
96               The Philippines       1
97                   Timor-Leste       1
98                          Togo       1
99           Trinidad and Tobago       1
100                      Tunisia       1
101                 Turkmenistan       1
102                       Uganda       1
103         United Arab Emirates       1
104                      Uruguay       1
105                   Uzbekistan       1
106                      Vietnam       1
107                       Zambia       1

cluster_aglo1 |> filter(Cluster == 2)

        Country Cluster
1     Argentina       2
2          Cuba       2
3      Ethiopia       2
4         Haiti       2
5          Iran       2
6          Laos       2
7       Lebanon       2
8   North Korea       2
9  Sierra Leone       2
10        Sudan       2
11     Suriname       2
12      Türkiye       2
13    Venezuela       2
14     Zimbabwe       2

cluster_aglo1 |> filter(Cluster == 3)

          Country Cluster
1       Australia       3
2         Austria       3
3         Belgium       3
4          Canada       3
5           Chile       3
6         Croatia       3
7          Cyprus       3
8  Czech Republic       3
9         Denmark       3
10        Estonia       3
11        Finland       3
12         France       3
13        Germany       3
14         Greece       3
15        Iceland       3
16        Ireland       3
17         Israel       3
18          Italy       3
19          Japan       3
20         Latvia       3
21      Lithuania       3
22     Luxembourg       3
23          Malta       3
24    Netherlands       3
25    New Zealand       3
26         Norway       3
27         Poland       3
28       Portugal       3
29      Singapore       3
30       Slovakia       3
31       Slovenia       3
32    South Korea       3
33          Spain       3
34         Sweden       3
35    Switzerland       3
36         Taiwan       3
37 United Kingdom       3
38  United States       3

Método Divisivo: Cálculo del clúster divisivo

Para k=3

divisivo1 = hcut(distancias, 
                k = 3,             
                hc_func='diana') #Método divisivo

fviz_dend(divisivo1, # Nuestro objeto cluster creado
          rect = TRUE,  # Resalta los cluster solicitados
          cex = 0.5)    # Tamaño de etiquetas

Validación de clústers:

# Gráfico de silueta
fviz_silhouette(divisivo1, label=TRUE)

  cluster size ave.sil.width
1       1  151          0.59
2       2    7          0.37
3       3    1          0.00

Identificación de casos mal clasificados:

divisivo1$silinfo$widths |> data.frame() |> filter(sil_width<0)

      cluster neighbor   sil_width
Haiti       1        2 -0.09342856

Hay 1 observacion (paíse) mal clasificada

# Crear tabla País - Clúster
cluster_div1 <- data.frame(
  Country = data$Country,
  Cluster = divisivo1$cluster
)

# Ver la lista de países por clúster
cluster_div1 |> 
  arrange(Cluster)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                      Australia       1
6                        Austria       1
7                     Azerbaijan       1
8                        Bahrain       1
9                     Bangladesh       1
10                       Belarus       1
11                       Belgium       1
12                         Benin       1
13                        Bhutan       1
14                       Bolivia       1
15        Bosnia and Herzegovina       1
16                      Botswana       1
17                        Brazil       1
18                      Bulgaria       1
19                  Burkina Faso       1
20                         Burma       1
21                       Burundi       1
22                    Cabo Verde       1
23                      Cambodia       1
24                      Cameroon       1
25                        Canada       1
26      Central African Republic       1
27                          Chad       1
28                         Chile       1
29                         China       1
30                      Colombia       1
31                       Comoros       1
32                    Costa Rica       1
33                 Côte d'Ivoire       1
34                       Croatia       1
35                        Cyprus       1
36                Czech Republic       1
37  Democratic Republic of Congo       1
38                       Denmark       1
39                      Djibouti       1
40            Dominican Republic       1
41                       Ecuador       1
42                         Egypt       1
43                   El Salvador       1
44             Equatorial Guinea       1
45                       Eritrea       1
46                       Estonia       1
47                      Eswatini       1
48                      Ethiopia       1
49                          Fiji       1
50                       Finland       1
51                        France       1
52                         Gabon       1
53                       Georgia       1
54                       Germany       1
55                         Ghana       1
56                        Greece       1
57                     Guatemala       1
58                        Guinea       1
59                 Guinea-Bissau       1
60                        Guyana       1
61                         Haiti       1
62                      Honduras       1
63                       Hungary       1
64                       Iceland       1
65                         India       1
66                     Indonesia       1
67                       Ireland       1
68                        Israel       1
69                         Italy       1
70                       Jamaica       1
71                         Japan       1
72                        Jordan       1
73                    Kazakhstan       1
74                         Kenya       1
75                        Kuwait       1
76               Kyrgyz Republic       1
77                          Laos       1
78                        Latvia       1
79                       Lesotho       1
80                       Liberia       1
81                     Lithuania       1
82                    Luxembourg       1
83                    Madagascar       1
84                        Malawi       1
85                      Malaysia       1
86                          Mali       1
87                         Malta       1
88                    Mauritania       1
89                     Mauritius       1
90                        Mexico       1
91                       Moldova       1
92                      Mongolia       1
93                    Montenegro       1
94                       Morocco       1
95                    Mozambique       1
96                       Namibia       1
97                         Nepal       1
98                   Netherlands       1
99                   New Zealand       1
100                    Nicaragua       1
101                        Niger       1
102                      Nigeria       1
103              North Macedonia       1
104                       Norway       1
105                         Oman       1
106                     Pakistan       1
107                       Panama       1
108             Papua New Guinea       1
109                     Paraguay       1
110                         Peru       1
111                       Poland       1
112                     Portugal       1
113                        Qatar       1
114            Republic of Congo       1
115                      Romania       1
116                       Russia       1
117                       Rwanda       1
118                 Saudi Arabia       1
119                      Senegal       1
120                       Serbia       1
121                 Sierra Leone       1
122                    Singapore       1
123                     Slovakia       1
124                     Slovenia       1
125                 South Africa       1
126                  South Korea       1
127                        Spain       1
128                    Sri Lanka       1
129                     Suriname       1
130                       Sweden       1
131                  Switzerland       1
132                       Taiwan       1
133                   Tajikistan       1
134                     Tanzania       1
135                     Thailand       1
136                   The Gambia       1
137              The Philippines       1
138                  Timor-Leste       1
139                         Togo       1
140          Trinidad and Tobago       1
141                      Tunisia       1
142                      Türkiye       1
143                 Turkmenistan       1
144                       Uganda       1
145         United Arab Emirates       1
146               United Kingdom       1
147                United States       1
148                      Uruguay       1
149                   Uzbekistan       1
150                      Vietnam       1
151                       Zambia       1
152                    Argentina       2
153                         Cuba       2
154                         Iran       2
155                      Lebanon       2
156                        Sudan       2
157                    Venezuela       2
158                     Zimbabwe       2
159                  North Korea       3

cluster_div1 |> filter(Cluster == 1)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                      Australia       1
6                        Austria       1
7                     Azerbaijan       1
8                        Bahrain       1
9                     Bangladesh       1
10                       Belarus       1
11                       Belgium       1
12                         Benin       1
13                        Bhutan       1
14                       Bolivia       1
15        Bosnia and Herzegovina       1
16                      Botswana       1
17                        Brazil       1
18                      Bulgaria       1
19                  Burkina Faso       1
20                         Burma       1
21                       Burundi       1
22                    Cabo Verde       1
23                      Cambodia       1
24                      Cameroon       1
25                        Canada       1
26      Central African Republic       1
27                          Chad       1
28                         Chile       1
29                         China       1
30                      Colombia       1
31                       Comoros       1
32                    Costa Rica       1
33                 Côte d'Ivoire       1
34                       Croatia       1
35                        Cyprus       1
36                Czech Republic       1
37  Democratic Republic of Congo       1
38                       Denmark       1
39                      Djibouti       1
40            Dominican Republic       1
41                       Ecuador       1
42                         Egypt       1
43                   El Salvador       1
44             Equatorial Guinea       1
45                       Eritrea       1
46                       Estonia       1
47                      Eswatini       1
48                      Ethiopia       1
49                          Fiji       1
50                       Finland       1
51                        France       1
52                         Gabon       1
53                       Georgia       1
54                       Germany       1
55                         Ghana       1
56                        Greece       1
57                     Guatemala       1
58                        Guinea       1
59                 Guinea-Bissau       1
60                        Guyana       1
61                         Haiti       1
62                      Honduras       1
63                       Hungary       1
64                       Iceland       1
65                         India       1
66                     Indonesia       1
67                       Ireland       1
68                        Israel       1
69                         Italy       1
70                       Jamaica       1
71                         Japan       1
72                        Jordan       1
73                    Kazakhstan       1
74                         Kenya       1
75                        Kuwait       1
76               Kyrgyz Republic       1
77                          Laos       1
78                        Latvia       1
79                       Lesotho       1
80                       Liberia       1
81                     Lithuania       1
82                    Luxembourg       1
83                    Madagascar       1
84                        Malawi       1
85                      Malaysia       1
86                          Mali       1
87                         Malta       1
88                    Mauritania       1
89                     Mauritius       1
90                        Mexico       1
91                       Moldova       1
92                      Mongolia       1
93                    Montenegro       1
94                       Morocco       1
95                    Mozambique       1
96                       Namibia       1
97                         Nepal       1
98                   Netherlands       1
99                   New Zealand       1
100                    Nicaragua       1
101                        Niger       1
102                      Nigeria       1
103              North Macedonia       1
104                       Norway       1
105                         Oman       1
106                     Pakistan       1
107                       Panama       1
108             Papua New Guinea       1
109                     Paraguay       1
110                         Peru       1
111                       Poland       1
112                     Portugal       1
113                        Qatar       1
114            Republic of Congo       1
115                      Romania       1
116                       Russia       1
117                       Rwanda       1
118                 Saudi Arabia       1
119                      Senegal       1
120                       Serbia       1
121                 Sierra Leone       1
122                    Singapore       1
123                     Slovakia       1
124                     Slovenia       1
125                 South Africa       1
126                  South Korea       1
127                        Spain       1
128                    Sri Lanka       1
129                     Suriname       1
130                       Sweden       1
131                  Switzerland       1
132                       Taiwan       1
133                   Tajikistan       1
134                     Tanzania       1
135                     Thailand       1
136                   The Gambia       1
137              The Philippines       1
138                  Timor-Leste       1
139                         Togo       1
140          Trinidad and Tobago       1
141                      Tunisia       1
142                      Türkiye       1
143                 Turkmenistan       1
144                       Uganda       1
145         United Arab Emirates       1
146               United Kingdom       1
147                United States       1
148                      Uruguay       1
149                   Uzbekistan       1
150                      Vietnam       1
151                       Zambia       1

cluster_div1 |> filter(Cluster == 2)

    Country Cluster
1 Argentina       2
2      Cuba       2
3      Iran       2
4   Lebanon       2
5     Sudan       2
6 Venezuela       2
7  Zimbabwe       2

cluster_div1 |> filter(Cluster == 3)

      Country Cluster
1 North Korea       3

K-means - Métodos de partición

Determinación del número de clusters

Usando el criterio de Suma de Cuadrados

library(factoextra)

set.seed(2025) # Establece la semilla. 
fviz_nbclust(subdata, kmeans, method = "wss", k.max = 15) +
  geom_vline(xintercept = 3, linetype = 2) +
  labs(subtitle = "Método Elbow") + theme_bw()

Tenemos que identificar en qué punto comienza a haber estabilidad en la suma de cuadrados total dentro de los clúster

Usando el Gráfico de Silueta

library(factoextra)

set.seed(2025)
fviz_nbclust(subdata, kmeans, 
             method = "silhouette", k.max = 15) +
  labs(subtitle = "Silhouette method")

Usando el paquete NbClust

library(NbClust)
set.seed(2025)
res.nbclust2 <- NbClust(subdata, distance = "euclidean",
                       min.nc = 2, max.nc = 10, 
                       method = "kmeans", index ="all")

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 2 proposed 2 as the best number of clusters 
* 12 proposed 3 as the best number of clusters 
* 4 proposed 4 as the best number of clusters 
* 2 proposed 5 as the best number of clusters 
* 1 proposed 9 as the best number of clusters 
* 2 proposed 10 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
*******************************************************************

La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.

Aplicación del K-means

set.seed(2025)
km1 <- kmeans(subdata, 
             centers = 3,     # Número de Cluster
             iter.max = 100,  # Número de iteraciones máxima
             nstart = 25,     # Número de puntos iniciales
             algorithm = "Lloyd")

Visualizando

library(factoextra)
fviz_cluster(km1, data = subdata, ellipse.type = "convex") +
  theme_classic()

Usando el índice de Silueta

km_clusters1 <- eclust(x = subdata, FUNcluster = "kmeans", 
                      k = 3, seed = 2025,
                      hc_metric = "euclidean",
                      graph = FALSE)

fviz_silhouette(sil.obj = km_clusters1, 
                print.summary = TRUE, 
                palette = "jco",
                ggtheme = theme_classic())

  cluster size ave.sil.width
1       1    8          0.32
2       2   80          0.26
3       3   71          0.31

km_clusters1$centers

  Property Rights Government Integrity Judicial Effectiveness Tax Burden
1      -0.7113993           -0.6463516             -0.6052632 -0.8185619
2      -0.2982207           -0.3203980             -0.3097076  0.1807692
3       0.6592365            0.7581342              0.6428466 -0.2090537
  Government Spending Fiscal Health Business Freedom Labor Freedom
1          -0.2752066    -0.5667408       -1.1279599    -2.1581633
2           0.1420248    -0.2154894       -0.5224499    -0.4616071
3          -0.5952741    -0.0755456        0.4127351     0.5635240
  Monetary Freedom Trade Freedom Investment Freedom Financial Freedom
1       -7.8624161    -1.6526846         -1.7500000        -1.6250000
2       -0.4739933    -0.4449664         -0.5650000        -0.5312500
3        0.3017298     0.4614803          0.3126761         0.6443662
  Free and fair elections Civil liberties
1              -0.6044588      -0.5356796
2              -0.3961161      -0.3466505
3               0.1784617       0.4073841

km_clusters1$silinfo$widths |> data.frame() |> filter(sil_width<0)

       cluster neighbor   sil_width
Iran         1        2 -0.12602429
Brazil       2        3 -0.01065274
Mexico       2        3 -0.03395911

3 observaciones (países) mal clasificados

# Crear tabla País - Clúster
cluster_km1 <- data.frame(
  Country = data$Country,
  Cluster = km_clusters1$cluster
)

# Ver la lista de países por clúster
cluster_km1 |> 
  arrange(Cluster)

                                                  Country Cluster
Argentina                                       Argentina       1
Cuba                                                 Cuba       1
Iran                                                 Iran       1
Lebanon                                           Lebanon       1
North Korea                                   North Korea       1
Sudan                                               Sudan       1
Venezuela                                       Venezuela       1
Zimbabwe                                         Zimbabwe       1
Algeria                                           Algeria       2
Angola                                             Angola       2
Azerbaijan                                     Azerbaijan       2
Bangladesh                                     Bangladesh       2
Belarus                                           Belarus       2
Benin                                               Benin       2
Bhutan                                             Bhutan       2
Bolivia                                           Bolivia       2
Brazil                                             Brazil       2
Burkina Faso                                 Burkina Faso       2
Burma                                               Burma       2
Burundi                                           Burundi       2
Cambodia                                         Cambodia       2
Cameroon                                         Cameroon       2
Central African Republic         Central African Republic       2
Chad                                                 Chad       2
China                                               China       2
Comoros                                           Comoros       2
Côte d'Ivoire                               Côte d'Ivoire       2
Democratic Republic of Congo Democratic Republic of Congo       2
Djibouti                                         Djibouti       2
Dominican Republic                     Dominican Republic       2
Ecuador                                           Ecuador       2
Egypt                                               Egypt       2
El Salvador                                   El Salvador       2
Equatorial Guinea                       Equatorial Guinea       2
Eritrea                                           Eritrea       2
Eswatini                                         Eswatini       2
Ethiopia                                         Ethiopia       2
Gabon                                               Gabon       2
Ghana                                               Ghana       2
Guatemala                                       Guatemala       2
Guinea                                             Guinea       2
Guinea-Bissau                               Guinea-Bissau       2
Guyana                                             Guyana       2
Haiti                                               Haiti       2
Honduras                                         Honduras       2
India                                               India       2
Kazakhstan                                     Kazakhstan       2
Kenya                                               Kenya       2
Kuwait                                             Kuwait       2
Kyrgyz Republic                           Kyrgyz Republic       2
Laos                                                 Laos       2
Lesotho                                           Lesotho       2
Liberia                                           Liberia       2
Madagascar                                     Madagascar       2
Malawi                                             Malawi       2
Mali                                                 Mali       2
Mauritania                                     Mauritania       2
Mexico                                             Mexico       2
Moldova                                           Moldova       2
Mozambique                                     Mozambique       2
Nepal                                               Nepal       2
Nicaragua                                       Nicaragua       2
Niger                                               Niger       2
Nigeria                                           Nigeria       2
Pakistan                                         Pakistan       2
Papua New Guinea                         Papua New Guinea       2
Paraguay                                         Paraguay       2
Republic of Congo                       Republic of Congo       2
Russia                                             Russia       2
Rwanda                                             Rwanda       2
Saudi Arabia                                 Saudi Arabia       2
Senegal                                           Senegal       2
Sierra Leone                                 Sierra Leone       2
Sri Lanka                                       Sri Lanka       2
Suriname                                         Suriname       2
Tajikistan                                     Tajikistan       2
Tanzania                                         Tanzania       2
Thailand                                         Thailand       2
The Gambia                                     The Gambia       2
Timor-Leste                                   Timor-Leste       2
Togo                                                 Togo       2
Tunisia                                           Tunisia       2
Türkiye                                           Türkiye       2
Turkmenistan                                 Turkmenistan       2
Uganda                                             Uganda       2
Uzbekistan                                     Uzbekistan       2
Vietnam                                           Vietnam       2
Zambia                                             Zambia       2
Albania                                           Albania       3
Armenia                                           Armenia       3
Australia                                       Australia       3
Austria                                           Austria       3
Bahrain                                           Bahrain       3
Belgium                                           Belgium       3
Bosnia and Herzegovina             Bosnia and Herzegovina       3
Botswana                                         Botswana       3
Bulgaria                                         Bulgaria       3
Cabo Verde                                     Cabo Verde       3
Canada                                             Canada       3
Chile                                               Chile       3
Colombia                                         Colombia       3
Costa Rica                                     Costa Rica       3
Croatia                                           Croatia       3
Cyprus                                             Cyprus       3
Czech Republic                             Czech Republic       3
Denmark                                           Denmark       3
Estonia                                           Estonia       3
Fiji                                                 Fiji       3
Finland                                           Finland       3
France                                             France       3
Georgia                                           Georgia       3
Germany                                           Germany       3
Greece                                             Greece       3
Hungary                                           Hungary       3
Iceland                                           Iceland       3
Indonesia                                       Indonesia       3
Ireland                                           Ireland       3
Israel                                             Israel       3
Italy                                               Italy       3
Jamaica                                           Jamaica       3
Japan                                               Japan       3
Jordan                                             Jordan       3
Latvia                                             Latvia       3
Lithuania                                       Lithuania       3
Luxembourg                                     Luxembourg       3
Malaysia                                         Malaysia       3
Malta                                               Malta       3
Mauritius                                       Mauritius       3
Mongolia                                         Mongolia       3
Montenegro                                     Montenegro       3
Morocco                                           Morocco       3
Namibia                                           Namibia       3
Netherlands                                   Netherlands       3
New Zealand                                   New Zealand       3
North Macedonia                           North Macedonia       3
Norway                                             Norway       3
Oman                                                 Oman       3
Panama                                             Panama       3
Peru                                                 Peru       3
Poland                                             Poland       3
Portugal                                         Portugal       3
Qatar                                               Qatar       3
Romania                                           Romania       3
Serbia                                             Serbia       3
Singapore                                       Singapore       3
Slovakia                                         Slovakia       3
Slovenia                                         Slovenia       3
South Africa                                 South Africa       3
South Korea                                   South Korea       3
Spain                                               Spain       3
Sweden                                             Sweden       3
Switzerland                                   Switzerland       3
Taiwan                                             Taiwan       3
The Philippines                           The Philippines       3
Trinidad and Tobago                   Trinidad and Tobago       3
United Arab Emirates                 United Arab Emirates       3
United Kingdom                             United Kingdom       3
United States                               United States       3
Uruguay                                           Uruguay       3

cluster_km1 |> filter(Cluster == 1)

                Country Cluster
Argentina     Argentina       1
Cuba               Cuba       1
Iran               Iran       1
Lebanon         Lebanon       1
North Korea North Korea       1
Sudan             Sudan       1
Venezuela     Venezuela       1
Zimbabwe       Zimbabwe       1

cluster_km1 |> filter(Cluster == 2)

                                                  Country Cluster
Algeria                                           Algeria       2
Angola                                             Angola       2
Azerbaijan                                     Azerbaijan       2
Bangladesh                                     Bangladesh       2
Belarus                                           Belarus       2
Benin                                               Benin       2
Bhutan                                             Bhutan       2
Bolivia                                           Bolivia       2
Brazil                                             Brazil       2
Burkina Faso                                 Burkina Faso       2
Burma                                               Burma       2
Burundi                                           Burundi       2
Cambodia                                         Cambodia       2
Cameroon                                         Cameroon       2
Central African Republic         Central African Republic       2
Chad                                                 Chad       2
China                                               China       2
Comoros                                           Comoros       2
Côte d'Ivoire                               Côte d'Ivoire       2
Democratic Republic of Congo Democratic Republic of Congo       2
Djibouti                                         Djibouti       2
Dominican Republic                     Dominican Republic       2
Ecuador                                           Ecuador       2
Egypt                                               Egypt       2
El Salvador                                   El Salvador       2
Equatorial Guinea                       Equatorial Guinea       2
Eritrea                                           Eritrea       2
Eswatini                                         Eswatini       2
Ethiopia                                         Ethiopia       2
Gabon                                               Gabon       2
Ghana                                               Ghana       2
Guatemala                                       Guatemala       2
Guinea                                             Guinea       2
Guinea-Bissau                               Guinea-Bissau       2
Guyana                                             Guyana       2
Haiti                                               Haiti       2
Honduras                                         Honduras       2
India                                               India       2
Kazakhstan                                     Kazakhstan       2
Kenya                                               Kenya       2
Kuwait                                             Kuwait       2
Kyrgyz Republic                           Kyrgyz Republic       2
Laos                                                 Laos       2
Lesotho                                           Lesotho       2
Liberia                                           Liberia       2
Madagascar                                     Madagascar       2
Malawi                                             Malawi       2
Mali                                                 Mali       2
Mauritania                                     Mauritania       2
Mexico                                             Mexico       2
Moldova                                           Moldova       2
Mozambique                                     Mozambique       2
Nepal                                               Nepal       2
Nicaragua                                       Nicaragua       2
Niger                                               Niger       2
Nigeria                                           Nigeria       2
Pakistan                                         Pakistan       2
Papua New Guinea                         Papua New Guinea       2
Paraguay                                         Paraguay       2
Republic of Congo                       Republic of Congo       2
Russia                                             Russia       2
Rwanda                                             Rwanda       2
Saudi Arabia                                 Saudi Arabia       2
Senegal                                           Senegal       2
Sierra Leone                                 Sierra Leone       2
Sri Lanka                                       Sri Lanka       2
Suriname                                         Suriname       2
Tajikistan                                     Tajikistan       2
Tanzania                                         Tanzania       2
Thailand                                         Thailand       2
The Gambia                                     The Gambia       2
Timor-Leste                                   Timor-Leste       2
Togo                                                 Togo       2
Tunisia                                           Tunisia       2
Türkiye                                           Türkiye       2
Turkmenistan                                 Turkmenistan       2
Uganda                                             Uganda       2
Uzbekistan                                     Uzbekistan       2
Vietnam                                           Vietnam       2
Zambia                                             Zambia       2

cluster_km1 |> filter(Cluster == 3)

                                      Country Cluster
Albania                               Albania       3
Armenia                               Armenia       3
Australia                           Australia       3
Austria                               Austria       3
Bahrain                               Bahrain       3
Belgium                               Belgium       3
Bosnia and Herzegovina Bosnia and Herzegovina       3
Botswana                             Botswana       3
Bulgaria                             Bulgaria       3
Cabo Verde                         Cabo Verde       3
Canada                                 Canada       3
Chile                                   Chile       3
Colombia                             Colombia       3
Costa Rica                         Costa Rica       3
Croatia                               Croatia       3
Cyprus                                 Cyprus       3
Czech Republic                 Czech Republic       3
Denmark                               Denmark       3
Estonia                               Estonia       3
Fiji                                     Fiji       3
Finland                               Finland       3
France                                 France       3
Georgia                               Georgia       3
Germany                               Germany       3
Greece                                 Greece       3
Hungary                               Hungary       3
Iceland                               Iceland       3
Indonesia                           Indonesia       3
Ireland                               Ireland       3
Israel                                 Israel       3
Italy                                   Italy       3
Jamaica                               Jamaica       3
Japan                                   Japan       3
Jordan                                 Jordan       3
Latvia                                 Latvia       3
Lithuania                           Lithuania       3
Luxembourg                         Luxembourg       3
Malaysia                             Malaysia       3
Malta                                   Malta       3
Mauritius                           Mauritius       3
Mongolia                             Mongolia       3
Montenegro                         Montenegro       3
Morocco                               Morocco       3
Namibia                               Namibia       3
Netherlands                       Netherlands       3
New Zealand                       New Zealand       3
North Macedonia               North Macedonia       3
Norway                                 Norway       3
Oman                                     Oman       3
Panama                                 Panama       3
Peru                                     Peru       3
Poland                                 Poland       3
Portugal                             Portugal       3
Qatar                                   Qatar       3
Romania                               Romania       3
Serbia                                 Serbia       3
Singapore                           Singapore       3
Slovakia                             Slovakia       3
Slovenia                             Slovenia       3
South Africa                     South Africa       3
South Korea                       South Korea       3
Spain                                   Spain       3
Sweden                                 Sweden       3
Switzerland                       Switzerland       3
Taiwan                                 Taiwan       3
The Philippines               The Philippines       3
Trinidad and Tobago       Trinidad and Tobago       3
United Arab Emirates     United Arab Emirates       3
United Kingdom                 United Kingdom       3
United States                   United States       3
Uruguay                               Uruguay       3

Clustering con Componentes Principales (PC)

Clúster Jerárquico

Cálculo de distancias

distancias_pca <- dist(data_pcas[, 1:2], method = "euclidean")

Determinación del número de clusters

Métodos gráficos:

Utilizaremos el método del codo (elbow method) mediante la función fviz_nbclust() del paquete factoextra.

Primero usamos el criterio de Suma de Cuadrados dentro de clusters:

fviz_nbclust(data_pcas[, 1:2],       # Solo PC1 y PC2        
             hcut,                   # Método de clusterización elegido  
             diss = distancias_pca,  # Euclidiana
             method = "wss",         # Método
             k.max = 15,             # Pruebo con 15 cluster
             verbose = F)

Usando el gráfico de silueta:

fviz_nbclust(data_pcas[, 1:2],       # Solo PC1 y PC2
             hcut,                   # Método de clusterización elegido  
             diss = distancias_pca,  # Euclidiana
             method = "silhouette",  # Método
             k.max = 15,             # Pruebo con 15 cluster
             verbose = F)

Métodos Robustos:

Complementaremos el análisis visual con la función NbClust() que aplica simultáneamente hasta 30 índices diferentes para determinar el número óptimo de clusters.

library(NbClust)
set.seed(2025)
res.nbclust3 <- NbClust(data_pcas[, 1:2],                 # Data
                       distance = "euclidean",  # Distancia utilizada
                       min.nc = 2,              # Mínimo
                       max.nc = 10,             # Máximo
                       method = "ward.D",       # Método
                       index ="all")            # Índices considerados

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 1 proposed 2 as the best number of clusters 
* 13 proposed 3 as the best number of clusters 
* 2 proposed 4 as the best number of clusters 
* 4 proposed 5 as the best number of clusters 
* 2 proposed 8 as the best number of clusters 
* 1 proposed 10 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
*******************************************************************

La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.

Método Aglomerativo: Cálculo del clúster aglomerativo

Para k=3

aglomerativo2 = hcut(x = distancias_pca,  # Matriz de distancias
                    k = 3,                # Se visualizarán 2
                    hc_func='agnes',      # Aglomerativo
                    hc_method = "ward.D") # Criterio para aglomerar

Primero, veamos la asignación de cluster:

fviz_dend(aglomerativo2, # Nuestro objeto cluster creado
          rect = TRUE,  # Resalta los cluster solicitados
          cex = 0.5)    # Tamaño de etiquetas

Validación de clústers:

fviz_silhouette(aglomerativo2, label=TRUE)

  cluster size ave.sil.width
1       1  104          0.46
2       2   15          0.24
3       3   40          0.65

Identificación de casos mal clasificados:

aglomerativo2$silinfo$widths |> data.frame() |> filter(sil_width<0)

           cluster neighbor   sil_width
Bulgaria         1        3 -0.03251306
Jamaica          1        3 -0.04107258
Cabo Verde       1        3 -0.06670040
Botswana         1        3 -0.10893057
Uruguay          1        3 -0.16584989
Romania          1        3 -0.19343016
Suriname         2        1 -0.07007601
Laos             2        1 -0.20431921
Burundi          2        1 -0.21278928

1 observacion (países) mal clasificada

# Crear tabla País - Clúster
cluster_aglo2 <- data.frame(
  Country = data$Country,
  Cluster = aglomerativo2$cluster
)

# Ver la lista de países por clúster
cluster_aglo2 |> 
  arrange(Cluster)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                     Azerbaijan       1
6                        Bahrain       1
7                     Bangladesh       1
8                        Belarus       1
9                          Benin       1
10                        Bhutan       1
11                       Bolivia       1
12        Bosnia and Herzegovina       1
13                      Botswana       1
14                        Brazil       1
15                      Bulgaria       1
16                  Burkina Faso       1
17                         Burma       1
18                    Cabo Verde       1
19                      Cambodia       1
20                      Cameroon       1
21      Central African Republic       1
22                          Chad       1
23                         China       1
24                      Colombia       1
25                       Comoros       1
26                    Costa Rica       1
27                 Côte d'Ivoire       1
28  Democratic Republic of Congo       1
29                      Djibouti       1
30            Dominican Republic       1
31                       Ecuador       1
32                         Egypt       1
33                   El Salvador       1
34             Equatorial Guinea       1
35                       Eritrea       1
36                      Eswatini       1
37                          Fiji       1
38                         Gabon       1
39                       Georgia       1
40                         Ghana       1
41                     Guatemala       1
42                        Guinea       1
43                 Guinea-Bissau       1
44                        Guyana       1
45                      Honduras       1
46                         India       1
47                     Indonesia       1
48                       Jamaica       1
49                        Jordan       1
50                    Kazakhstan       1
51                         Kenya       1
52                        Kuwait       1
53               Kyrgyz Republic       1
54                       Lesotho       1
55                       Liberia       1
56                    Madagascar       1
57                        Malawi       1
58                      Malaysia       1
59                          Mali       1
60                    Mauritania       1
61                        Mexico       1
62                       Moldova       1
63                      Mongolia       1
64                    Montenegro       1
65                       Morocco       1
66                    Mozambique       1
67                       Namibia       1
68                         Nepal       1
69                     Nicaragua       1
70                         Niger       1
71                       Nigeria       1
72               North Macedonia       1
73                          Oman       1
74                      Pakistan       1
75                        Panama       1
76              Papua New Guinea       1
77                      Paraguay       1
78                          Peru       1
79                         Qatar       1
80             Republic of Congo       1
81                       Romania       1
82                        Russia       1
83                        Rwanda       1
84                  Saudi Arabia       1
85                       Senegal       1
86                        Serbia       1
87                  South Africa       1
88                     Sri Lanka       1
89                    Tajikistan       1
90                      Tanzania       1
91                      Thailand       1
92                    The Gambia       1
93               The Philippines       1
94                   Timor-Leste       1
95                          Togo       1
96           Trinidad and Tobago       1
97                       Tunisia       1
98                  Turkmenistan       1
99                        Uganda       1
100         United Arab Emirates       1
101                      Uruguay       1
102                   Uzbekistan       1
103                      Vietnam       1
104                       Zambia       1
105                    Argentina       2
106                      Burundi       2
107                         Cuba       2
108                     Ethiopia       2
109                        Haiti       2
110                         Iran       2
111                         Laos       2
112                      Lebanon       2
113                  North Korea       2
114                 Sierra Leone       2
115                        Sudan       2
116                     Suriname       2
117                      Türkiye       2
118                    Venezuela       2
119                     Zimbabwe       2
120                    Australia       3
121                      Austria       3
122                      Belgium       3
123                       Canada       3
124                        Chile       3
125                      Croatia       3
126                       Cyprus       3
127               Czech Republic       3
128                      Denmark       3
129                      Estonia       3
130                      Finland       3
131                       France       3
132                      Germany       3
133                       Greece       3
134                      Hungary       3
135                      Iceland       3
136                      Ireland       3
137                       Israel       3
138                        Italy       3
139                        Japan       3
140                       Latvia       3
141                    Lithuania       3
142                   Luxembourg       3
143                        Malta       3
144                    Mauritius       3
145                  Netherlands       3
146                  New Zealand       3
147                       Norway       3
148                       Poland       3
149                     Portugal       3
150                    Singapore       3
151                     Slovakia       3
152                     Slovenia       3
153                  South Korea       3
154                        Spain       3
155                       Sweden       3
156                  Switzerland       3
157                       Taiwan       3
158               United Kingdom       3
159                United States       3

cluster_aglo2 |> filter(Cluster == 1)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                     Azerbaijan       1
6                        Bahrain       1
7                     Bangladesh       1
8                        Belarus       1
9                          Benin       1
10                        Bhutan       1
11                       Bolivia       1
12        Bosnia and Herzegovina       1
13                      Botswana       1
14                        Brazil       1
15                      Bulgaria       1
16                  Burkina Faso       1
17                         Burma       1
18                    Cabo Verde       1
19                      Cambodia       1
20                      Cameroon       1
21      Central African Republic       1
22                          Chad       1
23                         China       1
24                      Colombia       1
25                       Comoros       1
26                    Costa Rica       1
27                 Côte d'Ivoire       1
28  Democratic Republic of Congo       1
29                      Djibouti       1
30            Dominican Republic       1
31                       Ecuador       1
32                         Egypt       1
33                   El Salvador       1
34             Equatorial Guinea       1
35                       Eritrea       1
36                      Eswatini       1
37                          Fiji       1
38                         Gabon       1
39                       Georgia       1
40                         Ghana       1
41                     Guatemala       1
42                        Guinea       1
43                 Guinea-Bissau       1
44                        Guyana       1
45                      Honduras       1
46                         India       1
47                     Indonesia       1
48                       Jamaica       1
49                        Jordan       1
50                    Kazakhstan       1
51                         Kenya       1
52                        Kuwait       1
53               Kyrgyz Republic       1
54                       Lesotho       1
55                       Liberia       1
56                    Madagascar       1
57                        Malawi       1
58                      Malaysia       1
59                          Mali       1
60                    Mauritania       1
61                        Mexico       1
62                       Moldova       1
63                      Mongolia       1
64                    Montenegro       1
65                       Morocco       1
66                    Mozambique       1
67                       Namibia       1
68                         Nepal       1
69                     Nicaragua       1
70                         Niger       1
71                       Nigeria       1
72               North Macedonia       1
73                          Oman       1
74                      Pakistan       1
75                        Panama       1
76              Papua New Guinea       1
77                      Paraguay       1
78                          Peru       1
79                         Qatar       1
80             Republic of Congo       1
81                       Romania       1
82                        Russia       1
83                        Rwanda       1
84                  Saudi Arabia       1
85                       Senegal       1
86                        Serbia       1
87                  South Africa       1
88                     Sri Lanka       1
89                    Tajikistan       1
90                      Tanzania       1
91                      Thailand       1
92                    The Gambia       1
93               The Philippines       1
94                   Timor-Leste       1
95                          Togo       1
96           Trinidad and Tobago       1
97                       Tunisia       1
98                  Turkmenistan       1
99                        Uganda       1
100         United Arab Emirates       1
101                      Uruguay       1
102                   Uzbekistan       1
103                      Vietnam       1
104                       Zambia       1

cluster_aglo2 |> filter(Cluster == 2)

        Country Cluster
1     Argentina       2
2       Burundi       2
3          Cuba       2
4      Ethiopia       2
5         Haiti       2
6          Iran       2
7          Laos       2
8       Lebanon       2
9   North Korea       2
10 Sierra Leone       2
11        Sudan       2
12     Suriname       2
13      Türkiye       2
14    Venezuela       2
15     Zimbabwe       2

cluster_aglo2 |> filter(Cluster == 3)

          Country Cluster
1       Australia       3
2         Austria       3
3         Belgium       3
4          Canada       3
5           Chile       3
6         Croatia       3
7          Cyprus       3
8  Czech Republic       3
9         Denmark       3
10        Estonia       3
11        Finland       3
12         France       3
13        Germany       3
14         Greece       3
15        Hungary       3
16        Iceland       3
17        Ireland       3
18         Israel       3
19          Italy       3
20          Japan       3
21         Latvia       3
22      Lithuania       3
23     Luxembourg       3
24          Malta       3
25      Mauritius       3
26    Netherlands       3
27    New Zealand       3
28         Norway       3
29         Poland       3
30       Portugal       3
31      Singapore       3
32       Slovakia       3
33       Slovenia       3
34    South Korea       3
35          Spain       3
36         Sweden       3
37    Switzerland       3
38         Taiwan       3
39 United Kingdom       3
40  United States       3

Método Divisivo: Cálculo del clúster divisivo

Para k=3

divisivo2 = hcut(distancias_pca, 
                k = 3,             
                hc_func='diana') #Método divisivo

fviz_dend(divisivo2, # Nuestro objeto cluster creado
          rect = TRUE,  # Resalta los cluster solicitados
          cex = 0.5)    # Tamaño de etiquetas

Validación de clústers:

# Gráfico de silueta
fviz_silhouette(divisivo2, label=TRUE)

  cluster size ave.sil.width
1       1  150          0.52
2       2    3          0.39
3       3    6          0.50

Identificación de casos mal clasificados:

divisivo2$silinfo$widths |> data.frame() |> filter(sil_width<0)

             cluster neighbor    sil_width
Sri Lanka          1        2 -0.005008857
Eritrea            1        2 -0.025144444
Burundi            1        2 -0.289728484
Laos               1        2 -0.292980732
Suriname           1        2 -0.342594237
Türkiye            1        2 -0.435756569
Ethiopia           1        2 -0.493263680
Sierra Leone       1        2 -0.528404815

1 observaciones (países) mal clasificadas

# Crear tabla País - Clúster
cluster_div2 <- data.frame(
  Country = data$Country,
  Cluster = divisivo2$cluster
)

# Ver la lista de países por clúster
cluster_div2 |> 
  arrange(Cluster)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                      Australia       1
6                        Austria       1
7                     Azerbaijan       1
8                        Bahrain       1
9                     Bangladesh       1
10                       Belarus       1
11                       Belgium       1
12                         Benin       1
13                        Bhutan       1
14                       Bolivia       1
15        Bosnia and Herzegovina       1
16                      Botswana       1
17                        Brazil       1
18                      Bulgaria       1
19                  Burkina Faso       1
20                         Burma       1
21                       Burundi       1
22                    Cabo Verde       1
23                      Cambodia       1
24                      Cameroon       1
25                        Canada       1
26      Central African Republic       1
27                          Chad       1
28                         Chile       1
29                         China       1
30                      Colombia       1
31                       Comoros       1
32                    Costa Rica       1
33                 Côte d'Ivoire       1
34                       Croatia       1
35                        Cyprus       1
36                Czech Republic       1
37  Democratic Republic of Congo       1
38                       Denmark       1
39                      Djibouti       1
40            Dominican Republic       1
41                       Ecuador       1
42                         Egypt       1
43                   El Salvador       1
44             Equatorial Guinea       1
45                       Eritrea       1
46                       Estonia       1
47                      Eswatini       1
48                      Ethiopia       1
49                          Fiji       1
50                       Finland       1
51                        France       1
52                         Gabon       1
53                       Georgia       1
54                       Germany       1
55                         Ghana       1
56                        Greece       1
57                     Guatemala       1
58                        Guinea       1
59                 Guinea-Bissau       1
60                        Guyana       1
61                      Honduras       1
62                       Hungary       1
63                       Iceland       1
64                         India       1
65                     Indonesia       1
66                       Ireland       1
67                        Israel       1
68                         Italy       1
69                       Jamaica       1
70                         Japan       1
71                        Jordan       1
72                    Kazakhstan       1
73                         Kenya       1
74                        Kuwait       1
75               Kyrgyz Republic       1
76                          Laos       1
77                        Latvia       1
78                       Lesotho       1
79                       Liberia       1
80                     Lithuania       1
81                    Luxembourg       1
82                    Madagascar       1
83                        Malawi       1
84                      Malaysia       1
85                          Mali       1
86                         Malta       1
87                    Mauritania       1
88                     Mauritius       1
89                        Mexico       1
90                       Moldova       1
91                      Mongolia       1
92                    Montenegro       1
93                       Morocco       1
94                    Mozambique       1
95                       Namibia       1
96                         Nepal       1
97                   Netherlands       1
98                   New Zealand       1
99                     Nicaragua       1
100                        Niger       1
101                      Nigeria       1
102              North Macedonia       1
103                       Norway       1
104                         Oman       1
105                     Pakistan       1
106                       Panama       1
107             Papua New Guinea       1
108                     Paraguay       1
109                         Peru       1
110                       Poland       1
111                     Portugal       1
112                        Qatar       1
113            Republic of Congo       1
114                      Romania       1
115                       Russia       1
116                       Rwanda       1
117                 Saudi Arabia       1
118                      Senegal       1
119                       Serbia       1
120                 Sierra Leone       1
121                    Singapore       1
122                     Slovakia       1
123                     Slovenia       1
124                 South Africa       1
125                  South Korea       1
126                        Spain       1
127                    Sri Lanka       1
128                     Suriname       1
129                       Sweden       1
130                  Switzerland       1
131                       Taiwan       1
132                   Tajikistan       1
133                     Tanzania       1
134                     Thailand       1
135                   The Gambia       1
136              The Philippines       1
137                  Timor-Leste       1
138                         Togo       1
139          Trinidad and Tobago       1
140                      Tunisia       1
141                      Türkiye       1
142                 Turkmenistan       1
143                       Uganda       1
144         United Arab Emirates       1
145               United Kingdom       1
146                United States       1
147                      Uruguay       1
148                   Uzbekistan       1
149                      Vietnam       1
150                       Zambia       1
151                    Argentina       2
152                        Haiti       2
153                         Iran       2
154                         Cuba       3
155                      Lebanon       3
156                  North Korea       3
157                        Sudan       3
158                    Venezuela       3
159                     Zimbabwe       3

cluster_div2 |> filter(Cluster == 1)

                         Country Cluster
1                        Albania       1
2                        Algeria       1
3                         Angola       1
4                        Armenia       1
5                      Australia       1
6                        Austria       1
7                     Azerbaijan       1
8                        Bahrain       1
9                     Bangladesh       1
10                       Belarus       1
11                       Belgium       1
12                         Benin       1
13                        Bhutan       1
14                       Bolivia       1
15        Bosnia and Herzegovina       1
16                      Botswana       1
17                        Brazil       1
18                      Bulgaria       1
19                  Burkina Faso       1
20                         Burma       1
21                       Burundi       1
22                    Cabo Verde       1
23                      Cambodia       1
24                      Cameroon       1
25                        Canada       1
26      Central African Republic       1
27                          Chad       1
28                         Chile       1
29                         China       1
30                      Colombia       1
31                       Comoros       1
32                    Costa Rica       1
33                 Côte d'Ivoire       1
34                       Croatia       1
35                        Cyprus       1
36                Czech Republic       1
37  Democratic Republic of Congo       1
38                       Denmark       1
39                      Djibouti       1
40            Dominican Republic       1
41                       Ecuador       1
42                         Egypt       1
43                   El Salvador       1
44             Equatorial Guinea       1
45                       Eritrea       1
46                       Estonia       1
47                      Eswatini       1
48                      Ethiopia       1
49                          Fiji       1
50                       Finland       1
51                        France       1
52                         Gabon       1
53                       Georgia       1
54                       Germany       1
55                         Ghana       1
56                        Greece       1
57                     Guatemala       1
58                        Guinea       1
59                 Guinea-Bissau       1
60                        Guyana       1
61                      Honduras       1
62                       Hungary       1
63                       Iceland       1
64                         India       1
65                     Indonesia       1
66                       Ireland       1
67                        Israel       1
68                         Italy       1
69                       Jamaica       1
70                         Japan       1
71                        Jordan       1
72                    Kazakhstan       1
73                         Kenya       1
74                        Kuwait       1
75               Kyrgyz Republic       1
76                          Laos       1
77                        Latvia       1
78                       Lesotho       1
79                       Liberia       1
80                     Lithuania       1
81                    Luxembourg       1
82                    Madagascar       1
83                        Malawi       1
84                      Malaysia       1
85                          Mali       1
86                         Malta       1
87                    Mauritania       1
88                     Mauritius       1
89                        Mexico       1
90                       Moldova       1
91                      Mongolia       1
92                    Montenegro       1
93                       Morocco       1
94                    Mozambique       1
95                       Namibia       1
96                         Nepal       1
97                   Netherlands       1
98                   New Zealand       1
99                     Nicaragua       1
100                        Niger       1
101                      Nigeria       1
102              North Macedonia       1
103                       Norway       1
104                         Oman       1
105                     Pakistan       1
106                       Panama       1
107             Papua New Guinea       1
108                     Paraguay       1
109                         Peru       1
110                       Poland       1
111                     Portugal       1
112                        Qatar       1
113            Republic of Congo       1
114                      Romania       1
115                       Russia       1
116                       Rwanda       1
117                 Saudi Arabia       1
118                      Senegal       1
119                       Serbia       1
120                 Sierra Leone       1
121                    Singapore       1
122                     Slovakia       1
123                     Slovenia       1
124                 South Africa       1
125                  South Korea       1
126                        Spain       1
127                    Sri Lanka       1
128                     Suriname       1
129                       Sweden       1
130                  Switzerland       1
131                       Taiwan       1
132                   Tajikistan       1
133                     Tanzania       1
134                     Thailand       1
135                   The Gambia       1
136              The Philippines       1
137                  Timor-Leste       1
138                         Togo       1
139          Trinidad and Tobago       1
140                      Tunisia       1
141                      Türkiye       1
142                 Turkmenistan       1
143                       Uganda       1
144         United Arab Emirates       1
145               United Kingdom       1
146                United States       1
147                      Uruguay       1
148                   Uzbekistan       1
149                      Vietnam       1
150                       Zambia       1

cluster_div2 |> filter(Cluster == 2)

    Country Cluster
1 Argentina       2
2     Haiti       2
3      Iran       2

cluster_div2 |> filter(Cluster == 3)

      Country Cluster
1        Cuba       3
2     Lebanon       3
3 North Korea       3
4       Sudan       3
5   Venezuela       3
6    Zimbabwe       3

K-means - Métodos de partición

Determinación del número de clusters

Usando el criterio de Suma de Cuadrados

library(factoextra)

set.seed(2025) # Establece la semilla. 
fviz_nbclust(data_pcas[, 1:2], kmeans, method = "wss", k.max = 15) +
  geom_vline(xintercept = 3, linetype = 2) +
  labs(subtitle = "Método Elbow") + theme_bw()

Tenemos que identificar en qué punto comienza a haber estabilidad en la suma de cuadrados total dentro de los clúster

Usando el Gráfico de Silueta

library(factoextra)

set.seed(2025)
fviz_nbclust(data_pcas[, 1:2], kmeans, 
             method = "silhouette", k.max = 15) +
  labs(subtitle = "Silhouette method")

Usando el paquete NbClust

library(NbClust)
set.seed(2025)
res.nbclust4 <- NbClust(data_pcas[, 1:2], distance = "euclidean",
                       min.nc = 2, max.nc = 10, 
                       method = "kmeans", index ="all")

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 4 proposed 2 as the best number of clusters 
* 11 proposed 3 as the best number of clusters 
* 6 proposed 5 as the best number of clusters 
* 1 proposed 6 as the best number of clusters 
* 1 proposed 10 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
*******************************************************************

La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.

Aplicación del K-means

set.seed(2025)
km2 <- kmeans(data_pcas[, 1:2], 
             centers = 3,     # Número de Cluster
             iter.max = 100,  # Número de iteraciones máxima
             nstart = 25,     # Número de puntos iniciales
             algorithm = "Lloyd")

Visualizando

library(factoextra)
fviz_cluster(km2, data = data_pcas[, 1:2], ellipse.type = "convex") +
  theme_classic()

Usando el índice de Silueta

km_clusters2 <- eclust(x = data_pcas[, 1:2], FUNcluster = "kmeans", 
                      k = 3, seed = 2025,
                      hc_metric = "euclidean",
                      graph = FALSE)

fviz_silhouette(sil.obj = km_clusters2, 
                print.summary = TRUE, 
                palette = "jco",
                ggtheme = theme_classic())

  cluster size ave.sil.width
1       1    8          0.50
2       2   65          0.47
3       3   86          0.44

km_clusters2$centers

        pc1        pc2
1  8.450369 -2.7662199
2 -1.373519 -1.3604519
3  1.134167  0.6118582

km_clusters2$silinfo$widths |> data.frame() |> filter(sil_width<0)

                    cluster neighbor    sil_width
Iran                      1        3 -0.032604811
The Philippines           3        2 -0.001626911
Indonesia                 3        2 -0.004151305
Trinidad and Tobago       3        2 -0.006578924

4 obsv. mal clasificadas

# Crear tabla País - Clúster
cluster_km2 <- data.frame(
  Country = data$Country,
  Cluster = km_clusters2$cluster
)

# Ver la lista de países por clúster
cluster_km2 |> 
  arrange(Cluster)

                                                  Country Cluster
Argentina                                       Argentina       1
Cuba                                                 Cuba       1
Iran                                                 Iran       1
Lebanon                                           Lebanon       1
North Korea                                   North Korea       1
Sudan                                               Sudan       1
Venezuela                                       Venezuela       1
Zimbabwe                                         Zimbabwe       1
Albania                                           Albania       2
Armenia                                           Armenia       2
Australia                                       Australia       2
Austria                                           Austria       2
Bahrain                                           Bahrain       2
Belgium                                           Belgium       2
Bosnia and Herzegovina             Bosnia and Herzegovina       2
Botswana                                         Botswana       2
Bulgaria                                         Bulgaria       2
Cabo Verde                                     Cabo Verde       2
Canada                                             Canada       2
Chile                                               Chile       2
Colombia                                         Colombia       2
Costa Rica                                     Costa Rica       2
Croatia                                           Croatia       2
Cyprus                                             Cyprus       2
Czech Republic                             Czech Republic       2
Denmark                                           Denmark       2
Estonia                                           Estonia       2
Fiji                                                 Fiji       2
Finland                                           Finland       2
France                                             France       2
Georgia                                           Georgia       2
Germany                                           Germany       2
Greece                                             Greece       2
Hungary                                           Hungary       2
Iceland                                           Iceland       2
Ireland                                           Ireland       2
Israel                                             Israel       2
Italy                                               Italy       2
Jamaica                                           Jamaica       2
Japan                                               Japan       2
Jordan                                             Jordan       2
Latvia                                             Latvia       2
Lithuania                                       Lithuania       2
Luxembourg                                     Luxembourg       2
Malaysia                                         Malaysia       2
Malta                                               Malta       2
Mauritius                                       Mauritius       2
Montenegro                                     Montenegro       2
Morocco                                           Morocco       2
Namibia                                           Namibia       2
Netherlands                                   Netherlands       2
New Zealand                                   New Zealand       2
North Macedonia                           North Macedonia       2
Norway                                             Norway       2
Panama                                             Panama       2
Peru                                                 Peru       2
Poland                                             Poland       2
Portugal                                         Portugal       2
Romania                                           Romania       2
Serbia                                             Serbia       2
Singapore                                       Singapore       2
Slovakia                                         Slovakia       2
Slovenia                                         Slovenia       2
South Africa                                 South Africa       2
South Korea                                   South Korea       2
Spain                                               Spain       2
Sweden                                             Sweden       2
Switzerland                                   Switzerland       2
Taiwan                                             Taiwan       2
United Arab Emirates                 United Arab Emirates       2
United Kingdom                             United Kingdom       2
United States                               United States       2
Uruguay                                           Uruguay       2
Algeria                                           Algeria       3
Angola                                             Angola       3
Azerbaijan                                     Azerbaijan       3
Bangladesh                                     Bangladesh       3
Belarus                                           Belarus       3
Benin                                               Benin       3
Bhutan                                             Bhutan       3
Bolivia                                           Bolivia       3
Brazil                                             Brazil       3
Burkina Faso                                 Burkina Faso       3
Burma                                               Burma       3
Burundi                                           Burundi       3
Cambodia                                         Cambodia       3
Cameroon                                         Cameroon       3
Central African Republic         Central African Republic       3
Chad                                                 Chad       3
China                                               China       3
Comoros                                           Comoros       3
Côte d'Ivoire                               Côte d'Ivoire       3
Democratic Republic of Congo Democratic Republic of Congo       3
Djibouti                                         Djibouti       3
Dominican Republic                     Dominican Republic       3
Ecuador                                           Ecuador       3
Egypt                                               Egypt       3
El Salvador                                   El Salvador       3
Equatorial Guinea                       Equatorial Guinea       3
Eritrea                                           Eritrea       3
Eswatini                                         Eswatini       3
Ethiopia                                         Ethiopia       3
Gabon                                               Gabon       3
Ghana                                               Ghana       3
Guatemala                                       Guatemala       3
Guinea                                             Guinea       3
Guinea-Bissau                               Guinea-Bissau       3
Guyana                                             Guyana       3
Haiti                                               Haiti       3
Honduras                                         Honduras       3
India                                               India       3
Indonesia                                       Indonesia       3
Kazakhstan                                     Kazakhstan       3
Kenya                                               Kenya       3
Kuwait                                             Kuwait       3
Kyrgyz Republic                           Kyrgyz Republic       3
Laos                                                 Laos       3
Lesotho                                           Lesotho       3
Liberia                                           Liberia       3
Madagascar                                     Madagascar       3
Malawi                                             Malawi       3
Mali                                                 Mali       3
Mauritania                                     Mauritania       3
Mexico                                             Mexico       3
Moldova                                           Moldova       3
Mongolia                                         Mongolia       3
Mozambique                                     Mozambique       3
Nepal                                               Nepal       3
Nicaragua                                       Nicaragua       3
Niger                                               Niger       3
Nigeria                                           Nigeria       3
Oman                                                 Oman       3
Pakistan                                         Pakistan       3
Papua New Guinea                         Papua New Guinea       3
Paraguay                                         Paraguay       3
Qatar                                               Qatar       3
Republic of Congo                       Republic of Congo       3
Russia                                             Russia       3
Rwanda                                             Rwanda       3
Saudi Arabia                                 Saudi Arabia       3
Senegal                                           Senegal       3
Sierra Leone                                 Sierra Leone       3
Sri Lanka                                       Sri Lanka       3
Suriname                                         Suriname       3
Tajikistan                                     Tajikistan       3
Tanzania                                         Tanzania       3
Thailand                                         Thailand       3
The Gambia                                     The Gambia       3
The Philippines                           The Philippines       3
Timor-Leste                                   Timor-Leste       3
Togo                                                 Togo       3
Trinidad and Tobago                   Trinidad and Tobago       3
Tunisia                                           Tunisia       3
Türkiye                                           Türkiye       3
Turkmenistan                                 Turkmenistan       3
Uganda                                             Uganda       3
Uzbekistan                                     Uzbekistan       3
Vietnam                                           Vietnam       3
Zambia                                             Zambia       3

cluster_km2 |> filter(Cluster == 1)

                Country Cluster
Argentina     Argentina       1
Cuba               Cuba       1
Iran               Iran       1
Lebanon         Lebanon       1
North Korea North Korea       1
Sudan             Sudan       1
Venezuela     Venezuela       1
Zimbabwe       Zimbabwe       1

cluster_km2 |> filter(Cluster == 2)

                                      Country Cluster
Albania                               Albania       2
Armenia                               Armenia       2
Australia                           Australia       2
Austria                               Austria       2
Bahrain                               Bahrain       2
Belgium                               Belgium       2
Bosnia and Herzegovina Bosnia and Herzegovina       2
Botswana                             Botswana       2
Bulgaria                             Bulgaria       2
Cabo Verde                         Cabo Verde       2
Canada                                 Canada       2
Chile                                   Chile       2
Colombia                             Colombia       2
Costa Rica                         Costa Rica       2
Croatia                               Croatia       2
Cyprus                                 Cyprus       2
Czech Republic                 Czech Republic       2
Denmark                               Denmark       2
Estonia                               Estonia       2
Fiji                                     Fiji       2
Finland                               Finland       2
France                                 France       2
Georgia                               Georgia       2
Germany                               Germany       2
Greece                                 Greece       2
Hungary                               Hungary       2
Iceland                               Iceland       2
Ireland                               Ireland       2
Israel                                 Israel       2
Italy                                   Italy       2
Jamaica                               Jamaica       2
Japan                                   Japan       2
Jordan                                 Jordan       2
Latvia                                 Latvia       2
Lithuania                           Lithuania       2
Luxembourg                         Luxembourg       2
Malaysia                             Malaysia       2
Malta                                   Malta       2
Mauritius                           Mauritius       2
Montenegro                         Montenegro       2
Morocco                               Morocco       2
Namibia                               Namibia       2
Netherlands                       Netherlands       2
New Zealand                       New Zealand       2
North Macedonia               North Macedonia       2
Norway                                 Norway       2
Panama                                 Panama       2
Peru                                     Peru       2
Poland                                 Poland       2
Portugal                             Portugal       2
Romania                               Romania       2
Serbia                                 Serbia       2
Singapore                           Singapore       2
Slovakia                             Slovakia       2
Slovenia                             Slovenia       2
South Africa                     South Africa       2
South Korea                       South Korea       2
Spain                                   Spain       2
Sweden                                 Sweden       2
Switzerland                       Switzerland       2
Taiwan                                 Taiwan       2
United Arab Emirates     United Arab Emirates       2
United Kingdom                 United Kingdom       2
United States                   United States       2
Uruguay                               Uruguay       2

cluster_km2 |> filter(Cluster == 3)

                                                  Country Cluster
Algeria                                           Algeria       3
Angola                                             Angola       3
Azerbaijan                                     Azerbaijan       3
Bangladesh                                     Bangladesh       3
Belarus                                           Belarus       3
Benin                                               Benin       3
Bhutan                                             Bhutan       3
Bolivia                                           Bolivia       3
Brazil                                             Brazil       3
Burkina Faso                                 Burkina Faso       3
Burma                                               Burma       3
Burundi                                           Burundi       3
Cambodia                                         Cambodia       3
Cameroon                                         Cameroon       3
Central African Republic         Central African Republic       3
Chad                                                 Chad       3
China                                               China       3
Comoros                                           Comoros       3
Côte d'Ivoire                               Côte d'Ivoire       3
Democratic Republic of Congo Democratic Republic of Congo       3
Djibouti                                         Djibouti       3
Dominican Republic                     Dominican Republic       3
Ecuador                                           Ecuador       3
Egypt                                               Egypt       3
El Salvador                                   El Salvador       3
Equatorial Guinea                       Equatorial Guinea       3
Eritrea                                           Eritrea       3
Eswatini                                         Eswatini       3
Ethiopia                                         Ethiopia       3
Gabon                                               Gabon       3
Ghana                                               Ghana       3
Guatemala                                       Guatemala       3
Guinea                                             Guinea       3
Guinea-Bissau                               Guinea-Bissau       3
Guyana                                             Guyana       3
Haiti                                               Haiti       3
Honduras                                         Honduras       3
India                                               India       3
Indonesia                                       Indonesia       3
Kazakhstan                                     Kazakhstan       3
Kenya                                               Kenya       3
Kuwait                                             Kuwait       3
Kyrgyz Republic                           Kyrgyz Republic       3
Laos                                                 Laos       3
Lesotho                                           Lesotho       3
Liberia                                           Liberia       3
Madagascar                                     Madagascar       3
Malawi                                             Malawi       3
Mali                                                 Mali       3
Mauritania                                     Mauritania       3
Mexico                                             Mexico       3
Moldova                                           Moldova       3
Mongolia                                         Mongolia       3
Mozambique                                     Mozambique       3
Nepal                                               Nepal       3
Nicaragua                                       Nicaragua       3
Niger                                               Niger       3
Nigeria                                           Nigeria       3
Oman                                                 Oman       3
Pakistan                                         Pakistan       3
Papua New Guinea                         Papua New Guinea       3
Paraguay                                         Paraguay       3
Qatar                                               Qatar       3
Republic of Congo                       Republic of Congo       3
Russia                                             Russia       3
Rwanda                                             Rwanda       3
Saudi Arabia                                 Saudi Arabia       3
Senegal                                           Senegal       3
Sierra Leone                                 Sierra Leone       3
Sri Lanka                                       Sri Lanka       3
Suriname                                         Suriname       3
Tajikistan                                     Tajikistan       3
Tanzania                                         Tanzania       3
Thailand                                         Thailand       3
The Gambia                                     The Gambia       3
The Philippines                           The Philippines       3
Timor-Leste                                   Timor-Leste       3
Togo                                                 Togo       3
Trinidad and Tobago                   Trinidad and Tobago       3
Tunisia                                           Tunisia       3
Türkiye                                           Türkiye       3
Turkmenistan                                 Turkmenistan       3
Uganda                                             Uganda       3
Uzbekistan                                     Uzbekistan       3
Vietnam                                           Vietnam       3
Zambia                                             Zambia       3

Clúster Elegido

Incorporando el clúster a la data original

data |> mutate(Cluster=aglomerativo2$cluster) -> data.k

Caracterizando los clústers

subdata_indicadores <- data.k |>
  select(Cluster, 2:16)

caract_medianas <- subdata_indicadores |>
  group_by(across(contains("Cluster"))) |>
  summarise(across(1:14, median, .names = "Med_{.col}"))
caract_medianas

# A tibble: 3 × 15
  Cluster `Med_Overall Score` `Med_Property Rights` `Med_Government Integrity`
    <int>               <dbl>                 <dbl>                      <dbl>
1       1                56.7                  44.8                       36.5
2       2                44.1                  27.9                       23.2
3       3                72.8                  90                         74.8
# ℹ 11 more variables: `Med_Judicial Effectiveness` <dbl>,
#   `Med_Tax Burden` <dbl>, `Med_Government Spending` <dbl>,
#   `Med_Fiscal Health` <dbl>, `Med_Business Freedom` <dbl>,
#   `Med_Labor Freedom` <dbl>, `Med_Monetary Freedom` <dbl>,
#   `Med_Trade Freedom` <dbl>, `Med_Investment Freedom` <dbl>,
#   `Med_Financial Freedom` <dbl>, `Med_Free and fair elections` <dbl>

View(caract_medianas)

ggplot(data.k, aes(x = factor(Cluster), fill = `World regions`)) +
  geom_bar(position = "fill") +   # proporciones
  scale_y_continuous(labels = scales::percent) +
  labs(
    title = "Distribución porcentual de Regiones por clúster",
    x = "Clúster",
    y = "Porcentaje"
  ) +
  theme_minimal()

k_optimo = 3

# eps
kNNdistplot(pca_df, k = k_optimo)  
abline(h = 1, col = "red", lty = 2)

DBSCAN

rownames(subdata) <- data$Country

eps_values <- c(0.7, 0.8, 0.9, 1.0, 1.1, 1.2)
minPts <- 4

dbscan_results <- data.frame(
  eps = numeric(),
  minPts = integer(),
  clusters = integer(),
  noise_points = integer()
)

for (eps in eps_values) {
  model <- dbscan(pca_df, eps = eps, minPts = minPts)
  
  n_clusters <- length(unique(model$cluster)) - ifelse(any(model$cluster == 0), 1, 0)  # excluye cluster 0 (outliers)
  n_noise <- sum(model$cluster == 0)
  
  dbscan_results <- rbind(dbscan_results, data.frame(
    eps = eps,
    minPts = minPts,
    clusters = n_clusters,
    noise_points = n_noise
  ))
}

ggplot(dbscan_results, aes(x = eps, y = clusters)) +
  geom_line() +
  geom_point(size = 3) +
  labs(title = "Número de clusters vs eps (DBSCAN)", y = "Clusters detectados") +
  theme_minimal()

Eligo eps = 0.8

set.seed(2023)
dbscan_cluster1 <- fpc::dbscan(data = pca_df, 
                               eps = 0.8, 
                               MinPts = 4)
print(dbscan_cluster1)

dbscan Pts=159 MinPts=4 eps=0.8
        0   1 2 3
border 19   1 1 2
seed    0 131 4 1
total  19 132 5 3

fviz_cluster(dbscan_cluster1, 
             pca_df, stand = FALSE, 
             ellipse = FALSE, 
             geom = "point") + 
  labs(title = "DBSCAN") + theme_bw()

Validación

dbscan_valid <- pca_df[dbscan_cluster1$cluster != 0, ]
dbscan_labels <- dbscan_cluster1$cluster[dbscan_cluster1$cluster != 0]

# Calcular índice de silueta
sil_dbscan <- silhouette(dbscan_labels, dist(dbscan_valid))

fviz_silhouette(sil_dbscan) +
  labs(title = "Índice de Silueta para DBSCAN (sin outliers)") +
  theme_minimal()

  cluster size ave.sil.width
1       1  132          0.21
2       2    5          0.59
3       3    3          0.78

Caracterizando

data_clustered <- as.data.frame(subdata) %>%
  mutate(cluster_dbscan = factor(dbscan_cluster1$cluster))

skim(data_clustered)

Data summary
Name	data_clustered
Number of rows	159
Number of columns	15
_______________________
Column type frequency:
factor	1
numeric	14
________________________
Group variables	None

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
cluster_dbscan	0	1	FALSE	4	1: 132, 0: 19, 2: 5, 3: 3

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p75	p100	hist
Property Rights	1	0.11	0.62	-1.17	-0.38	0.62	1.22	▂▆▇▃▆
Government Integrity	1	0.14	0.74	-1.18	-0.39	0.61	1.94	▃▇▅▂▂
Judicial Effectiveness	1	0.10	0.64	-0.93	-0.38	0.62	1.30	▆▇▇▃▆
Tax Burden	1	-0.04	0.87	-5.24	-0.45	0.55	1.44	▁▁▁▇▇
Government Spending	1	-0.21	0.78	-2.44	-0.60	0.40	0.80	▁▂▃▆▇
Fiscal Health	1	-0.17	0.67	-1.58	-0.58	0.42	0.65	▃▂▃▅▇
Business Freedom	1	-0.14	0.65	-2.26	-0.61	0.39	0.94	▁▃▅▇▇
Labor Freedom	1	-0.09	1.04	-5.34	-0.51	0.49	2.50	▁▁▂▇▂
Monetary Freedom	1	-0.50	2.02	-9.52	-0.50	0.50	2.39	▁▁▁▇▇
Trade Freedom	1	-0.10	0.80	-4.85	-0.50	0.50	1.53	▁▁▂▇▆
Investment Freedom	1	-0.23	0.82	-2.40	-0.60	0.40	1.40	▁▂▆▇▂
Financial Freedom	1	-0.06	0.93	-2.50	-0.50	0.50	1.50	▁▃▃▇▃
Free and fair elections	1	-0.15	0.45	-0.78	-0.70	0.30	0.35	▆▁▁▂▇
Civil liberties	1	-0.02	0.54	-1.09	-0.51	0.49	0.86	▃▇▇▆▇

# Resumen
summary_by_cluster <- data_clustered %>%
  group_by(cluster_dbscan) %>%
  summarise(across(everything(), list(mean = mean, sd = sd), .names = "{.col}_{.fn}"))

summary_by_cluster

# A tibble: 4 × 29
  cluster_dbscan `Property Rights_mean` `Property Rights_sd`
  <fct>                           <dbl>                <dbl>
1 0                              -0.320                0.697
2 1                               0.198                0.583
3 2                              -0.499                0.281
4 3                              -0.102                0.337
# ℹ 26 more variables: `Government Integrity_mean` <dbl>,
#   `Government Integrity_sd` <dbl>, `Judicial Effectiveness_mean` <dbl>,
#   `Judicial Effectiveness_sd` <dbl>, `Tax Burden_mean` <dbl>,
#   `Tax Burden_sd` <dbl>, `Government Spending_mean` <dbl>,
#   `Government Spending_sd` <dbl>, `Fiscal Health_mean` <dbl>,
#   `Fiscal Health_sd` <dbl>, `Business Freedom_mean` <dbl>,
#   `Business Freedom_sd` <dbl>, `Labor Freedom_mean` <dbl>, …

Visualización

data_long <- data_clustered %>%
  pivot_longer(
    cols = -c(cluster_dbscan),  
    names_to = "Variable", 
    values_to = "Valor"
  )

ggplot(data_long, aes(x = cluster_dbscan, y = Valor, fill = cluster_dbscan)) +
  geom_boxplot(outlier.color = "red", outlier.shape = 1, alpha = 0.7) +
  facet_wrap(~ Variable, scales = "free_y") +
  labs(
    title = "Distribución de índices por Cluster (DBSCAN)",
    x = "Cluster DBSCAN",
    y = "Valor del índice"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 10)
  )

data_clustered$PC1 <- pc$x[,1]
data_clustered$PC2 <- pc$x[,2]

data_clustered_country <- rownames_to_column(data_clustered, var = "Country")

ggplot(data_clustered_country, aes(x = PC1, y = PC2, color = cluster_dbscan, label = Country)) +
  geom_point(size = 5, alpha = 1) +
  geom_text_repel(size = 3, max.overlaps = 100) +
  labs(
    title = "Clusters DBSCAN en espacio PCA (con nombres de países)",
    x = "PC1",
    y = "PC2"
  ) +
  theme_minimal()