library(pacman)Warning: package 'pacman' was built under R version 4.5.2
p_load(umap, cluster, factoextra, tidyverse, skimr, naniar,
tictoc, DataExplorer, ggplot2, plotly, psych, NbClust, dbscan, ggrepel)library(pacman)Warning: package 'pacman' was built under R version 4.5.2
p_load(umap, cluster, factoextra, tidyverse, skimr, naniar,
tictoc, DataExplorer, ggplot2, plotly, psych, NbClust, dbscan, ggrepel)Se carga el primer dataset que corresponde al Índice de Libertad Económica.
library(readr)
library(dplyr)
efi <- read_csv(
"index_economic_freedom.csv",
skip = 3, # saltar las 3 líneas de texto
na = c("N/A"), # tratar "N/A" como NA
col_select = -c('Index Year') # no se lee esa columna
)Rows: 184 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Country
dbl (13): Overall Score, Property Rights, Government Integrity, Judicial Eff...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(efi)# A tibble: 6 × 14
Country `Overall Score` `Property Rights` `Government Integrity`
<chr> <dbl> <dbl> <dbl>
1 Afghanistan NA 7.4 14.1
2 Albania 66.6 58.3 39.6
3 Algeria 47.5 27.6 29.5
4 Angola 55 39.9 28.2
5 Argentina 54.2 34.7 39.7
6 Armenia 65.4 49.9 50.4
# ℹ 10 more variables: `Judicial Effectiveness` <dbl>, `Tax Burden` <dbl>,
# `Government Spending` <dbl>, `Fiscal Health` <dbl>,
# `Business Freedom` <dbl>, `Labor Freedom` <dbl>, `Monetary Freedom` <dbl>,
# `Trade Freedom` <dbl>, `Investment Freedom` <dbl>,
# `Financial Freedom` <dbl>
Cargamos los dataset de civil liberties y electoral pluralism del Índice de Democracia.
pluralism <- read_csv("electoral_pluralism_index.csv")Rows: 166 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Entity, World regions according to OWID
dbl (2): Year, Free and fair elections
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
liberties <- read_csv("civil_liberties_index.csv")Rows: 166 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Entity, World regions according to OWID
dbl (2): Year, Civil liberties
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(pluralism)# A tibble: 6 × 4
Entity Year `Free and fair elections` `World regions according to OWID`
<chr> <dbl> <dbl> <chr>
1 Afghanistan 2024 0 Asia
2 Albania 2024 7 Europe
3 Algeria 2024 3.08 Africa
4 Angola 2024 4.5 Africa
5 Argentina 2024 9.17 South America
6 Armenia 2024 7.92 Asia
head(liberties)# A tibble: 6 × 4
Entity Year `Civil liberties` `World regions according to OWID`
<chr> <dbl> <dbl> <chr>
1 Afghanistan 2024 0 Asia
2 Albania 2024 7.06 Europe
3 Algeria 2024 3.82 Africa
4 Angola 2024 2.35 Africa
5 Argentina 2024 8.53 South America
6 Armenia 2024 5.29 Asia
Nos quedamos solo con las columnas de interés, y estandarizamos el nombre de la columna que corresponde al país.
pluralism <- pluralism |>
select(-Year, -'World regions according to OWID') |>
rename(Country = Entity)
liberties <- liberties |>
select(-Year) |>
rename(Country = Entity) |>
rename(`World regions` = `World regions according to OWID`) # más corto
head(liberties)# A tibble: 6 × 3
Country `Civil liberties` `World regions`
<chr> <dbl> <chr>
1 Afghanistan 0 Asia
2 Albania 7.06 Europe
3 Algeria 3.82 Africa
4 Angola 2.35 Africa
5 Argentina 8.53 South America
6 Armenia 5.29 Asia
You can add options to executable code like this
library(dplyr)
# 1. Vectores de paíse
c_efi <- sort(unique(efi$Country))
c_pluralism <- sort(unique(pluralism$Country))
c_liberies <- sort(unique(liberties$Country))
# 2. Todos los países que aparecen en al menos una de las bases
all_countries <- sort(unique(c(c_efi, c_pluralism, c_liberies)))
# 3. Tabla de presencia
presencia <- tibble(
Country = all_countries,
in_cfi = Country %in% c_efi,
in_pluralism = Country %in% c_pluralism,
in_liberties = Country %in% c_liberies
)
# 4. Ver SOLO los que NO están presentes en las 3 a la vez
presencia_problema <- presencia |>
filter(!(in_cfi & in_pluralism & in_liberties)) |>
arrange(Country)
presencia_problema# A tibble: 40 × 4
Country in_cfi in_pluralism in_liberties
<chr> <lgl> <lgl> <lgl>
1 Barbados TRUE FALSE FALSE
2 Belize TRUE FALSE FALSE
3 Brunei Darussalam TRUE FALSE FALSE
4 Burma TRUE FALSE FALSE
5 Cabo Verde TRUE FALSE FALSE
6 Cape Verde FALSE TRUE TRUE
7 Congo FALSE TRUE TRUE
8 Cote d'Ivoire FALSE TRUE TRUE
9 Czech Republic TRUE FALSE FALSE
10 Czechia FALSE TRUE TRUE
# ℹ 30 more rows
View(presencia_problema)
#TRUE significa que ese nombre de país está presente en ese dataset, y FALSE que no está presente.Podemos observar los nombres de países que están presente en el dataset del Ínidce de Libertad Econnómica pero no en los del ïndice de Democracia (Pluralism and Liberties), y viceversa también.
Sin embargo, podemos notar que algunos nombres de países diferentes hacen referencia al mismo país debido a que el nombre de algunos países son escritos de manera diferente en distintas bases de datos. Entonces, hay que identificarlos y luego elegir una sola forma de nombrarlos para poder hacer el merge.
Para estandarizar los nombres de los países se tomará como referencia al Índice de Libertad Económica.
pluralism <- pluralism |>
mutate(
Country = recode(
Country,
"Cape Verde" = "Cabo Verde",
"Congo" = "Republic of Congo",
"Cote d'Ivoire" = "Côte d'Ivoire",
"Czechia" = "Czech Republic",
"East Timor" = "Timor-Leste",
"Gambia" = "The Gambia",
"Kyrgyzstan" = "Kyrgyz Republic",
"Myanmar" = "Burma",
"Philippines" = "The Philippines",
"Turkey" = "Türkiye"
)
)
liberties <- liberties |>
mutate(
Country = recode(
Country,
"Cape Verde" = "Cabo Verde",
"Congo" = "Republic of Congo",
"Cote d'Ivoire" = "Côte d'Ivoire",
"Czechia" = "Czech Republic",
"East Timor" = "Timor-Leste",
"Gambia" = "The Gambia",
"Kyrgyzstan" = "Kyrgyz Republic",
"Myanmar" = "Burma",
"Philippines" = "The Philippines",
"Turkey" = "Türkiye"
)
)Se procede a unir los dataset para tener la data con todas las variables necesarias para usarla en el algoritmo.
# Une datasets en base a la variable "Country"
data <- efi |>
inner_join(pluralism, by = "Country") |>
inner_join(liberties, by = "Country")
row.names(data) <- data$Country # Renombrar las filas como paísesWarning: Setting row names on a tibble is deprecated.
head(data)# A tibble: 6 × 17
Country `Overall Score` `Property Rights` `Government Integrity`
<chr> <dbl> <dbl> <dbl>
1 Afghanistan NA 7.4 14.1
2 Albania 66.6 58.3 39.6
3 Algeria 47.5 27.6 29.5
4 Angola 55 39.9 28.2
5 Argentina 54.2 34.7 39.7
6 Armenia 65.4 49.9 50.4
# ℹ 13 more variables: `Judicial Effectiveness` <dbl>, `Tax Burden` <dbl>,
# `Government Spending` <dbl>, `Fiscal Health` <dbl>,
# `Business Freedom` <dbl>, `Labor Freedom` <dbl>, `Monetary Freedom` <dbl>,
# `Trade Freedom` <dbl>, `Investment Freedom` <dbl>,
# `Financial Freedom` <dbl>, `Free and fair elections` <dbl>,
# `Civil liberties` <dbl>, `World regions` <chr>
Librerias:
library(pacman)
p_load(umap, cluster, factoextra, tidyverse, skimr, naniar,
tictoc, DataExplorer, ggplot2, plotly, psych, NbClust)# resumen general
skim(data)| Name | data |
| Number of rows | 165 |
| Number of columns | 17 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 15 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| Country | 0 | 1 | 4 | 28 | 0 | 165 | 0 |
| World regions | 0 | 1 | 4 | 13 | 0 | 6 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Overall Score | 6 | 0.96 | 59.57 | 11.95 | 3.0 | 51.85 | 59.30 | 68.50 | 84.1 | ▁▁▃▇▅ |
| Property Rights | 0 | 1.00 | 52.23 | 26.60 | 0.7 | 30.80 | 47.40 | 74.10 | 100.0 | ▂▆▇▃▆ |
| Government Integrity | 0 | 1.00 | 42.92 | 22.69 | 3.3 | 25.10 | 39.30 | 57.20 | 98.0 | ▃▇▃▂▂ |
| Judicial Effectiveness | 0 | 1.00 | 46.50 | 28.07 | 2.7 | 26.50 | 43.10 | 68.10 | 99.3 | ▆▇▇▃▅ |
| Tax Burden | 6 | 0.96 | 77.65 | 12.99 | 0.0 | 71.50 | 78.30 | 86.45 | 99.9 | ▁▁▁▇▇ |
| Government Spending | 6 | 0.96 | 67.50 | 23.63 | 0.0 | 55.60 | 73.80 | 85.85 | 98.1 | ▁▂▃▆▇ |
| Fiscal Health | 6 | 0.96 | 63.13 | 30.30 | 0.0 | 44.60 | 70.80 | 89.55 | 100.0 | ▃▂▃▅▇ |
| Business Freedom | 6 | 0.96 | 63.39 | 17.80 | 5.0 | 50.45 | 67.10 | 77.90 | 93.0 | ▁▃▅▇▇ |
| Labor Freedom | 6 | 0.96 | 56.43 | 10.23 | 5.0 | 52.30 | 57.30 | 62.10 | 81.8 | ▁▁▂▇▂ |
| Monetary Freedom | 6 | 0.96 | 67.18 | 15.03 | 0.0 | 67.15 | 70.90 | 74.60 | 88.7 | ▁▁▁▇▇ |
| Trade Freedom | 5 | 0.97 | 70.71 | 11.90 | 0.0 | 64.85 | 72.20 | 79.60 | 95.0 | ▁▁▂▇▆ |
| Investment Freedom | 6 | 0.96 | 54.18 | 20.58 | 0.0 | 45.00 | 60.00 | 70.00 | 95.0 | ▁▂▆▇▂ |
| Financial Freedom | 6 | 0.96 | 48.77 | 18.55 | 0.0 | 40.00 | 50.00 | 60.00 | 80.0 | ▁▃▃▇▃ |
| Free and fair elections | 0 | 1.00 | 5.45 | 4.00 | 0.0 | 0.50 | 6.58 | 9.58 | 10.0 | ▆▁▂▂▇ |
| Civil liberties | 0 | 1.00 | 5.35 | 2.86 | 0.0 | 2.94 | 5.29 | 7.94 | 10.0 | ▅▇▇▆▇ |
Se observa presencia de missings
# visualización de patrón de NA
gg_miss_var(data)Analizando la cantidad de missings.
sum(!complete.cases(data)) # mostrar observaciones con datos faltantes "NA"[1] 6
sum(!complete.cases(data)) / nrow(data) * 100 # mostrar porcentaje % que representan estas observaciones[1] 3.636364
La cantidad de observaciones con NA’s representan el 3.6% del total. Así que se puede optar por eliminarlas.
# Histogramas
data |>
pivot_longer(cols = -c(Country, `Overall Score`, `World regions`), names_to = "Variable", values_to = "Valor") |>
ggplot(aes(x = Valor)) +
geom_histogram(fill = "#69b3a2", bins = 30) +
facet_wrap(~Variable, scales = "free") +
theme_minimal()Warning: Removed 53 rows containing non-finite outside the scale range
(`stat_bin()`).
data |>
select(-Country, -`Overall Score`, -`World regions`) |>
pivot_longer(everything(), names_to = "Variable", values_to = "Valor") |> ggplot(aes(x = Variable, y = Valor)) +
geom_boxplot(fill = "orange") +
coord_flip() +
theme_minimal() +
labs(title = "Distribución de todas las variables", x = NULL, y = NULL)Warning: Removed 53 rows containing non-finite outside the scale range
(`stat_boxplot()`).
# 1. Transformar a formato largo
top10_long <- data |>
pivot_longer(cols = -c(Country, `World regions`), names_to = "Variable", values_to = "Valor") |>
group_by(Variable) |>
slice_max(order_by = Valor, n = 10) |>
ungroup()
# 2. Visualizar todos los Top 10 con facet_wrap
ggplot(top10_long, aes(x = reorder(Country, Valor), y = Valor)) +
geom_col(fill = "steelblue") +
coord_flip() +
facet_wrap(~ Variable, scales = "free_y") +
labs(title = "Top 10 países por cada variable", x = NULL, y = NULL) +
theme_minimal()data <- na.omit(data)
sum(!complete.cases(data))[1] 0
Crear una subdata con las variables numéricas que se van a utilizar
subdata<-data |>
select(3:16) # Solo las columnas que son nuestras variables a utilizar
row.names(subdata) <- data$Country # Nombrando las filas con los paísesWarning: Setting row names on a tibble is deprecated.
head(subdata)# A tibble: 6 × 14
`Property Rights` `Government Integrity` `Judicial Effectiveness` `Tax Burden`
<dbl> <dbl> <dbl> <dbl>
1 58.3 39.6 59.2 88.8
2 27.6 29.5 29.8 80.3
3 39.9 28.2 25.7 86.9
4 34.7 39.7 56.5 70.8
5 49.9 50.4 31.3 88.1
6 90.4 86.2 95.3 62.1
# ℹ 10 more variables: `Government Spending` <dbl>, `Fiscal Health` <dbl>,
# `Business Freedom` <dbl>, `Labor Freedom` <dbl>, `Monetary Freedom` <dbl>,
# `Trade Freedom` <dbl>, `Investment Freedom` <dbl>,
# `Financial Freedom` <dbl>, `Free and fair elections` <dbl>,
# `Civil liberties` <dbl>
Relación de correlaciones
library(corrplot)Warning: package 'corrplot' was built under R version 4.5.2
corrplot 0.95 loaded
corrplot(cor(subdata, use="complete.obs"),
method="number", type="lower",
tl.cex=0.7, # etiquetas (nombres variables)
number.cex=0.7, # números dentro de celdas
number.digits=1,
mar=c(0,0,1,0))Se observa claramente bloques de variables con correlaciones altas, lo que no solo da buen indicio para justificar la aplicación de ACP, sino también replantear la aplicación de Clustering.
Matriz de Correlación
# Matriz de correlación
cor_data <- subdata
cor_matrix <- cor(cor_data)
# Convertir a tidy para ggplot2
cor_long <- cor_matrix |>
as.data.frame() |>
rownames_to_column("Var1") |>
pivot_longer(-Var1, names_to = "Var2", values_to = "Correlation")
# Gráfico tipo heatmap
ggplot(cor_long, aes(Var1, Var2, fill = Correlation)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
theme_minimal() +
coord_fixed() +
labs(title = "Matriz de correlaciones") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Kaiser-Meyer-Olkin (KMO)
psych::KMO(subdata)Kaiser-Meyer-Olkin factor adequacy
Call: psych::KMO(r = subdata)
Overall MSA = 0.9
MSA for each item =
Property Rights Government Integrity Judicial Effectiveness
0.93 0.93 0.91
Tax Burden Government Spending Fiscal Health
0.63 0.86 0.68
Business Freedom Labor Freedom Monetary Freedom
0.95 0.95 0.91
Trade Freedom Investment Freedom Financial Freedom
0.93 0.88 0.91
Free and fair elections Civil liberties
0.89 0.88
# Interpretación: el conjunto de variables tiene correlaciones suficientes para reducción de dimensión mayor a 0.6Mucho mayor a 0.6
Prueba de Esfericidad de Bartlett
# cortest.bartlett(matrixcor,
# n=dim(subdata)[1])
# Rechazo H0, la matriz de correlación no es una identidad, y por tanto es válida para PCA.El p-valor obtenido es menor a 0.05 entonces RECHAZAMOS la hipótesis nula y concluimos que NO ES UNA MATRIZ DE IDENTIDAD. Ergo, hay correlación significativa entre los indicadores que conforman este dataset.
scree(cor_matrix,
pc=TRUE,
factors=FALSE)O también el análisis paralelo (utiliza simulaciones con bootstrap). En este caso explícitamente nos recomienda un número de componentes.
fa.parallel(cor_matrix,fa="pc") Warning in fa.parallel(cor_matrix, fa = "pc"): It seems as if you are using a
correlation matrix, but have not specified the number of cases. The number of
subjects is arbitrarily set to be 100
Parallel analysis suggests that the number of factors = NA and the number of components = 2
El número de componentes recomendados por el algoritmo es 2
Escalamiento
Estandarización robusta del Dataset
# centrar por mediana, escalar por IQR
subdata <- scale(
subdata,
center = apply(subdata, 2, median), # centrado por mediana
scale = apply(subdata, 2, IQR) # escala por IQR
)# PCA con prcomp sobre datos robustamente escalados
pc <- prcomp(subdata, center = FALSE, scale. = FALSE) Varianza:
summary(pc)Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.5314 1.5382 0.9451 0.77191 0.64829 0.62707 0.49760
Proportion of Variance 0.5381 0.1987 0.0750 0.05003 0.03529 0.03302 0.02079
Cumulative Proportion 0.5381 0.7367 0.8117 0.86177 0.89705 0.93007 0.95086
PC8 PC9 PC10 PC11 PC12 PC13 PC14
Standard deviation 0.42610 0.39613 0.30846 0.2846 0.17791 0.14908 0.1293
Proportion of Variance 0.01524 0.01318 0.00799 0.0068 0.00266 0.00187 0.0014
Cumulative Proportion 0.96611 0.97928 0.98727 0.9941 0.99673 0.99860 1.0000
Visualización gráfica
fviz_eig(pc)Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
Ignoring empty aesthetic: `width`.
pc$rotation PC1 PC2 PC3 PC4
Property Rights -0.16918054 -0.25553830 0.01782990 0.0003481359
Government Integrity -0.18596478 -0.32624466 0.02456186 -0.0310945665
Judicial Effectiveness -0.16394167 -0.28320213 0.04871601 -0.0995664198
Tax Burden -0.01308185 0.31262578 -0.64734316 -0.0664125014
Government Spending 0.06406693 0.34839387 -0.36920735 -0.0306635255
Fiscal Health -0.06563946 0.07216788 -0.25804784 0.3776839353
Business Freedom -0.20528153 -0.17119720 -0.14615680 0.0693550373
Labor Freedom -0.30196330 -0.13663665 -0.27449635 -0.7273478966
Monetary Freedom -0.73283571 0.56632403 0.36164920 -0.0016684511
Trade Freedom -0.23601626 -0.12162914 -0.30612434 0.0763720762
Investment Freedom -0.25563386 -0.14183553 -0.14448551 0.3963758507
Financial Freedom -0.28303691 -0.23188001 -0.17609297 0.3628177534
Free and fair elections -0.11063381 -0.14232899 -0.03064571 -0.0596707156
Civil liberties -0.13251918 -0.21471369 -0.02670914 -0.0955605280
PC5 PC6 PC7 PC8
Property Rights -0.10896271 0.07443083 0.25656632 -0.224496018
Government Integrity -0.09549726 0.16422891 0.38090198 -0.367231713
Judicial Effectiveness 0.01762045 0.12381360 0.33161523 -0.032897560
Tax Burden -0.51569586 0.10888463 0.02293432 -0.174029412
Government Spending 0.39944419 -0.32038390 0.62968359 0.094489996
Fiscal Health 0.59253612 0.58669959 -0.17473181 -0.189483087
Business Freedom -0.01836113 0.11128857 0.10801869 0.142696693
Labor Freedom 0.31379949 -0.13548964 -0.36630580 -0.163151899
Monetary Freedom -0.04919648 0.06551258 0.05191776 -0.006760781
Trade Freedom -0.27036857 0.28128161 -0.10556792 0.468516795
Investment Freedom 0.07406099 -0.48923349 -0.21553703 0.142479671
Financial Freedom -0.02902011 -0.36959578 -0.07230880 -0.309142209
Free and fair elections 0.11171839 0.02095913 0.09185119 0.506479159
Civil liberties 0.09468411 0.05382774 0.17818723 0.315621034
PC9 PC10 PC11 PC12
Property Rights 0.01149807 -0.17840794 -0.012296550 -0.627087269
Government Integrity -0.07772265 -0.24065022 0.038567257 0.680342954
Judicial Effectiveness 0.07581866 0.01024401 0.303627125 -0.315961411
Tax Burden 0.39850185 -0.01028890 0.092401643 0.003854859
Government Spending -0.25908961 -0.02039591 -0.014834550 -0.014230927
Fiscal Health 0.09971991 -0.01647018 0.086680479 -0.024103465
Business Freedom 0.09109417 -0.06273389 -0.877575879 -0.062107328
Labor Freedom -0.07216014 -0.04018308 -0.022499524 -0.002653441
Monetary Freedom 0.02920561 0.02293477 0.016583595 0.013014772
Trade Freedom -0.64671650 -0.01603066 0.163782747 0.002239028
Investment Freedom 0.17883511 -0.60350167 0.152322267 0.020863757
Financial Freedom -0.10364172 0.67385905 0.001820971 0.018734321
Free and fair elections 0.39762965 0.23582124 -0.007407836 0.194447748
Civil liberties 0.34463013 0.17063080 0.262477764 0.025101730
PC13 PC14
Property Rights 0.560383199 -0.180947400
Government Integrity 0.111622701 -0.024660001
Judicial Effectiveness -0.535926725 0.518507330
Tax Burden -0.027012764 0.042751620
Government Spending 0.036326828 -0.018598151
Fiscal Health 0.033151309 0.009491218
Business Freedom -0.265725749 -0.028032349
Labor Freedom 0.025976248 0.011381529
Monetary Freedom -0.001657757 -0.012510445
Trade Freedom 0.036921605 -0.019863811
Investment Freedom -0.057905789 0.042027632
Financial Freedom 0.005946078 0.004284610
Free and fair elections 0.503753626 0.423994437
Civil liberties -0.230950659 -0.715900831
Otro gráfico interesante es el de cargas de las doce variables originales en dos dimensiones:
fviz_pca_var(pc, col.var = "contrib", gradient.cols = c("blue", "orange", "red"))Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the ggpubr package.
Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
ℹ The deprecated feature was likely used in the factoextra package.
Please report the issue at <https://github.com/kassambara/factoextra/issues>.
pca_df <- as.data.frame(pc$x[, 1:2]) # PC1 y PC2
rownames(pca_df) <- rownames(subdata)data_pcas <- as.data.frame(pc$x[, 1:14])
colnames(data_pcas) <- paste0("pc", 1:14)
head(data_pcas) pc1 pc2 pc3 pc4 pc5 pc6
Albania -0.8255053 0.19039181 -0.39994243 0.63923704 -0.74936976 0.4278672
Algeria 1.5730929 1.19715938 1.06931424 -0.91597235 -0.31554193 0.8797876
Angola 1.7900501 0.88299451 -0.40587299 -0.00836497 -0.07523845 0.6447173
Argentina 4.6479039 -4.10439415 -1.61500688 0.28340543 0.52267119 -0.7353509
Armenia -0.5131841 0.08812675 -0.70114624 0.26975751 -0.11598293 -0.1836875
Australia -2.5868146 -2.42894264 0.06025699 0.32120226 0.20770736 0.1429676
pc7 pc8 pc9 pc10 pc11
Albania 0.35342161 0.21261147 -0.01033754 0.356594265 0.18135755
Algeria 0.09623137 -0.26367017 0.33267732 0.029646242 -0.34112572
Angola 0.20779448 -0.33273726 -0.33011540 0.378305376 0.27539475
Argentina -0.53626977 0.06932088 0.36451979 0.353637585 0.42207017
Armenia -0.06232710 -0.33884228 0.26606182 -0.009472341 -0.14130835
Australia 0.38614570 -0.10429492 -0.59729526 0.133323764 0.04700138
pc12 pc13 pc14
Albania -0.23526534 -0.2087663 -0.05304177
Algeria 0.07897747 -0.2094832 -0.04317676
Angola -0.05984916 0.3374372 0.16368263
Argentina 0.15666941 -0.2821137 0.01757029
Armenia 0.34859675 0.1924481 -0.02981442
Australia 0.13150320 -0.1380388 -0.05629548
subdata: variables originales seleccionadas y escaladas
data_pcas: data con los componentes PC1 y PC2
# Proyección de los países en las 2 primeras dimensiones
fviz_pca_ind(pc,
geom.ind = "point",
col.ind = "cos2",
gradient.cols = c("blue", "orange", "red"),
repel = TRUE,
label = "none")Cálculo de distancias
distancias <- dist(subdata, method = "euclidean")Utilizaremos el método del codo (elbow method) mediante la función fviz_nbclust() del paquete factoextra.
Primero usamos el criterio de Suma de Cuadrados dentro de clusters:
fviz_nbclust(subdata, # Trabajar con todos los indicadores del índice
hcut, # Método de clusterización elegido
diss = distancias, # Euclidiana
method = "wss", # Método
k.max = 15, # Pruebo con 15 cluster
verbose = F)Usando el gráfico de silueta:
fviz_nbclust(subdata, # Trabajar con todos los indicadores del índice
hcut, # Método de clusterización elegido
diss = distancias, # Euclidiana
method = "silhouette", # Método
k.max = 15, # Pruebo con 15 cluster
verbose = F)Complementaremos el análisis visual con la función NbClust() que aplica simultáneamente hasta 30 índices diferentes para determinar el número óptimo de clusters.
library(NbClust)
set.seed(2025)
res.nbclust1 <- NbClust(subdata, # Data
distance = "euclidean", # Distancia utilizada
min.nc = 2, # Mínimo
max.nc = 10, # Máximo
method = "ward.D", # Método
index ="all") # Índices considerados*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 3 proposed 2 as the best number of clusters
* 12 proposed 3 as the best number of clusters
* 1 proposed 4 as the best number of clusters
* 4 proposed 5 as the best number of clusters
* 1 proposed 8 as the best number of clusters
* 1 proposed 9 as the best number of clusters
* 1 proposed 10 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 3
*******************************************************************
La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.
aglomerativo1 = hcut(x = distancias, # Matriz de distancias
k = 3, # Se visualizarán 2
hc_func='agnes', # Aglomerativo
hc_method = "ward.D") # Criterio para aglomerarPrimero, veamos la asignación de cluster:
fviz_dend(aglomerativo1, # Nuestro objeto cluster creado
rect = TRUE, # Resalta los cluster solicitados
cex = 0.5) # Tamaño de etiquetasWarning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
ℹ The deprecated feature was likely used in the factoextra package.
Please report the issue at <https://github.com/kassambara/factoextra/issues>.
fviz_silhouette(aglomerativo1, label=TRUE) cluster size ave.sil.width
1 1 107 0.27
2 2 14 0.16
3 3 38 0.44
Identificación de casos mal clasificados:
aglomerativo1$silinfo$widths |> data.frame() |> filter(sil_width<0) cluster neighbor sil_width
Serbia 1 3 -0.006449935
Bulgaria 1 3 -0.012346104
Romania 1 3 -0.043132571
Cabo Verde 1 3 -0.070608276
Jamaica 1 3 -0.083313477
Botswana 1 3 -0.085875734
Uruguay 1 3 -0.102904653
Hungary 1 3 -0.170913621
Mauritius 1 3 -0.251016040
Türkiye 2 1 -0.027284535
Sierra Leone 2 1 -0.089941334
Ethiopia 2 1 -0.104669698
Suriname 2 1 -0.147607920
Laos 2 1 -0.239642155
14 observaciones (países) mal clasificados
# Crear tabla País - Clúster
cluster_aglo1 <- data.frame(
Country = data$Country,
Cluster = aglomerativo1$cluster
)
# Ver la lista de países por clúster
cluster_aglo1 |>
arrange(Cluster) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Azerbaijan 1
6 Bahrain 1
7 Bangladesh 1
8 Belarus 1
9 Benin 1
10 Bhutan 1
11 Bolivia 1
12 Bosnia and Herzegovina 1
13 Botswana 1
14 Brazil 1
15 Bulgaria 1
16 Burkina Faso 1
17 Burma 1
18 Burundi 1
19 Cabo Verde 1
20 Cambodia 1
21 Cameroon 1
22 Central African Republic 1
23 Chad 1
24 China 1
25 Colombia 1
26 Comoros 1
27 Costa Rica 1
28 Côte d'Ivoire 1
29 Democratic Republic of Congo 1
30 Djibouti 1
31 Dominican Republic 1
32 Ecuador 1
33 Egypt 1
34 El Salvador 1
35 Equatorial Guinea 1
36 Eritrea 1
37 Eswatini 1
38 Fiji 1
39 Gabon 1
40 Georgia 1
41 Ghana 1
42 Guatemala 1
43 Guinea 1
44 Guinea-Bissau 1
45 Guyana 1
46 Honduras 1
47 Hungary 1
48 India 1
49 Indonesia 1
50 Jamaica 1
51 Jordan 1
52 Kazakhstan 1
53 Kenya 1
54 Kuwait 1
55 Kyrgyz Republic 1
56 Lesotho 1
57 Liberia 1
58 Madagascar 1
59 Malawi 1
60 Malaysia 1
61 Mali 1
62 Mauritania 1
63 Mauritius 1
64 Mexico 1
65 Moldova 1
66 Mongolia 1
67 Montenegro 1
68 Morocco 1
69 Mozambique 1
70 Namibia 1
71 Nepal 1
72 Nicaragua 1
73 Niger 1
74 Nigeria 1
75 North Macedonia 1
76 Oman 1
77 Pakistan 1
78 Panama 1
79 Papua New Guinea 1
80 Paraguay 1
81 Peru 1
82 Qatar 1
83 Republic of Congo 1
84 Romania 1
85 Russia 1
86 Rwanda 1
87 Saudi Arabia 1
88 Senegal 1
89 Serbia 1
90 South Africa 1
91 Sri Lanka 1
92 Tajikistan 1
93 Tanzania 1
94 Thailand 1
95 The Gambia 1
96 The Philippines 1
97 Timor-Leste 1
98 Togo 1
99 Trinidad and Tobago 1
100 Tunisia 1
101 Turkmenistan 1
102 Uganda 1
103 United Arab Emirates 1
104 Uruguay 1
105 Uzbekistan 1
106 Vietnam 1
107 Zambia 1
108 Argentina 2
109 Cuba 2
110 Ethiopia 2
111 Haiti 2
112 Iran 2
113 Laos 2
114 Lebanon 2
115 North Korea 2
116 Sierra Leone 2
117 Sudan 2
118 Suriname 2
119 Türkiye 2
120 Venezuela 2
121 Zimbabwe 2
122 Australia 3
123 Austria 3
124 Belgium 3
125 Canada 3
126 Chile 3
127 Croatia 3
128 Cyprus 3
129 Czech Republic 3
130 Denmark 3
131 Estonia 3
132 Finland 3
133 France 3
134 Germany 3
135 Greece 3
136 Iceland 3
137 Ireland 3
138 Israel 3
139 Italy 3
140 Japan 3
141 Latvia 3
142 Lithuania 3
143 Luxembourg 3
144 Malta 3
145 Netherlands 3
146 New Zealand 3
147 Norway 3
148 Poland 3
149 Portugal 3
150 Singapore 3
151 Slovakia 3
152 Slovenia 3
153 South Korea 3
154 Spain 3
155 Sweden 3
156 Switzerland 3
157 Taiwan 3
158 United Kingdom 3
159 United States 3
cluster_aglo1 |> filter(Cluster == 1) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Azerbaijan 1
6 Bahrain 1
7 Bangladesh 1
8 Belarus 1
9 Benin 1
10 Bhutan 1
11 Bolivia 1
12 Bosnia and Herzegovina 1
13 Botswana 1
14 Brazil 1
15 Bulgaria 1
16 Burkina Faso 1
17 Burma 1
18 Burundi 1
19 Cabo Verde 1
20 Cambodia 1
21 Cameroon 1
22 Central African Republic 1
23 Chad 1
24 China 1
25 Colombia 1
26 Comoros 1
27 Costa Rica 1
28 Côte d'Ivoire 1
29 Democratic Republic of Congo 1
30 Djibouti 1
31 Dominican Republic 1
32 Ecuador 1
33 Egypt 1
34 El Salvador 1
35 Equatorial Guinea 1
36 Eritrea 1
37 Eswatini 1
38 Fiji 1
39 Gabon 1
40 Georgia 1
41 Ghana 1
42 Guatemala 1
43 Guinea 1
44 Guinea-Bissau 1
45 Guyana 1
46 Honduras 1
47 Hungary 1
48 India 1
49 Indonesia 1
50 Jamaica 1
51 Jordan 1
52 Kazakhstan 1
53 Kenya 1
54 Kuwait 1
55 Kyrgyz Republic 1
56 Lesotho 1
57 Liberia 1
58 Madagascar 1
59 Malawi 1
60 Malaysia 1
61 Mali 1
62 Mauritania 1
63 Mauritius 1
64 Mexico 1
65 Moldova 1
66 Mongolia 1
67 Montenegro 1
68 Morocco 1
69 Mozambique 1
70 Namibia 1
71 Nepal 1
72 Nicaragua 1
73 Niger 1
74 Nigeria 1
75 North Macedonia 1
76 Oman 1
77 Pakistan 1
78 Panama 1
79 Papua New Guinea 1
80 Paraguay 1
81 Peru 1
82 Qatar 1
83 Republic of Congo 1
84 Romania 1
85 Russia 1
86 Rwanda 1
87 Saudi Arabia 1
88 Senegal 1
89 Serbia 1
90 South Africa 1
91 Sri Lanka 1
92 Tajikistan 1
93 Tanzania 1
94 Thailand 1
95 The Gambia 1
96 The Philippines 1
97 Timor-Leste 1
98 Togo 1
99 Trinidad and Tobago 1
100 Tunisia 1
101 Turkmenistan 1
102 Uganda 1
103 United Arab Emirates 1
104 Uruguay 1
105 Uzbekistan 1
106 Vietnam 1
107 Zambia 1
cluster_aglo1 |> filter(Cluster == 2) Country Cluster
1 Argentina 2
2 Cuba 2
3 Ethiopia 2
4 Haiti 2
5 Iran 2
6 Laos 2
7 Lebanon 2
8 North Korea 2
9 Sierra Leone 2
10 Sudan 2
11 Suriname 2
12 Türkiye 2
13 Venezuela 2
14 Zimbabwe 2
cluster_aglo1 |> filter(Cluster == 3) Country Cluster
1 Australia 3
2 Austria 3
3 Belgium 3
4 Canada 3
5 Chile 3
6 Croatia 3
7 Cyprus 3
8 Czech Republic 3
9 Denmark 3
10 Estonia 3
11 Finland 3
12 France 3
13 Germany 3
14 Greece 3
15 Iceland 3
16 Ireland 3
17 Israel 3
18 Italy 3
19 Japan 3
20 Latvia 3
21 Lithuania 3
22 Luxembourg 3
23 Malta 3
24 Netherlands 3
25 New Zealand 3
26 Norway 3
27 Poland 3
28 Portugal 3
29 Singapore 3
30 Slovakia 3
31 Slovenia 3
32 South Korea 3
33 Spain 3
34 Sweden 3
35 Switzerland 3
36 Taiwan 3
37 United Kingdom 3
38 United States 3
divisivo1 = hcut(distancias,
k = 3,
hc_func='diana') #Método divisivofviz_dend(divisivo1, # Nuestro objeto cluster creado
rect = TRUE, # Resalta los cluster solicitados
cex = 0.5) # Tamaño de etiquetas# Gráfico de silueta
fviz_silhouette(divisivo1, label=TRUE) cluster size ave.sil.width
1 1 151 0.59
2 2 7 0.37
3 3 1 0.00
Identificación de casos mal clasificados:
divisivo1$silinfo$widths |> data.frame() |> filter(sil_width<0) cluster neighbor sil_width
Haiti 1 2 -0.09342856
Hay 1 observacion (paíse) mal clasificada
# Crear tabla País - Clúster
cluster_div1 <- data.frame(
Country = data$Country,
Cluster = divisivo1$cluster
)
# Ver la lista de países por clúster
cluster_div1 |>
arrange(Cluster) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Australia 1
6 Austria 1
7 Azerbaijan 1
8 Bahrain 1
9 Bangladesh 1
10 Belarus 1
11 Belgium 1
12 Benin 1
13 Bhutan 1
14 Bolivia 1
15 Bosnia and Herzegovina 1
16 Botswana 1
17 Brazil 1
18 Bulgaria 1
19 Burkina Faso 1
20 Burma 1
21 Burundi 1
22 Cabo Verde 1
23 Cambodia 1
24 Cameroon 1
25 Canada 1
26 Central African Republic 1
27 Chad 1
28 Chile 1
29 China 1
30 Colombia 1
31 Comoros 1
32 Costa Rica 1
33 Côte d'Ivoire 1
34 Croatia 1
35 Cyprus 1
36 Czech Republic 1
37 Democratic Republic of Congo 1
38 Denmark 1
39 Djibouti 1
40 Dominican Republic 1
41 Ecuador 1
42 Egypt 1
43 El Salvador 1
44 Equatorial Guinea 1
45 Eritrea 1
46 Estonia 1
47 Eswatini 1
48 Ethiopia 1
49 Fiji 1
50 Finland 1
51 France 1
52 Gabon 1
53 Georgia 1
54 Germany 1
55 Ghana 1
56 Greece 1
57 Guatemala 1
58 Guinea 1
59 Guinea-Bissau 1
60 Guyana 1
61 Haiti 1
62 Honduras 1
63 Hungary 1
64 Iceland 1
65 India 1
66 Indonesia 1
67 Ireland 1
68 Israel 1
69 Italy 1
70 Jamaica 1
71 Japan 1
72 Jordan 1
73 Kazakhstan 1
74 Kenya 1
75 Kuwait 1
76 Kyrgyz Republic 1
77 Laos 1
78 Latvia 1
79 Lesotho 1
80 Liberia 1
81 Lithuania 1
82 Luxembourg 1
83 Madagascar 1
84 Malawi 1
85 Malaysia 1
86 Mali 1
87 Malta 1
88 Mauritania 1
89 Mauritius 1
90 Mexico 1
91 Moldova 1
92 Mongolia 1
93 Montenegro 1
94 Morocco 1
95 Mozambique 1
96 Namibia 1
97 Nepal 1
98 Netherlands 1
99 New Zealand 1
100 Nicaragua 1
101 Niger 1
102 Nigeria 1
103 North Macedonia 1
104 Norway 1
105 Oman 1
106 Pakistan 1
107 Panama 1
108 Papua New Guinea 1
109 Paraguay 1
110 Peru 1
111 Poland 1
112 Portugal 1
113 Qatar 1
114 Republic of Congo 1
115 Romania 1
116 Russia 1
117 Rwanda 1
118 Saudi Arabia 1
119 Senegal 1
120 Serbia 1
121 Sierra Leone 1
122 Singapore 1
123 Slovakia 1
124 Slovenia 1
125 South Africa 1
126 South Korea 1
127 Spain 1
128 Sri Lanka 1
129 Suriname 1
130 Sweden 1
131 Switzerland 1
132 Taiwan 1
133 Tajikistan 1
134 Tanzania 1
135 Thailand 1
136 The Gambia 1
137 The Philippines 1
138 Timor-Leste 1
139 Togo 1
140 Trinidad and Tobago 1
141 Tunisia 1
142 Türkiye 1
143 Turkmenistan 1
144 Uganda 1
145 United Arab Emirates 1
146 United Kingdom 1
147 United States 1
148 Uruguay 1
149 Uzbekistan 1
150 Vietnam 1
151 Zambia 1
152 Argentina 2
153 Cuba 2
154 Iran 2
155 Lebanon 2
156 Sudan 2
157 Venezuela 2
158 Zimbabwe 2
159 North Korea 3
cluster_div1 |> filter(Cluster == 1) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Australia 1
6 Austria 1
7 Azerbaijan 1
8 Bahrain 1
9 Bangladesh 1
10 Belarus 1
11 Belgium 1
12 Benin 1
13 Bhutan 1
14 Bolivia 1
15 Bosnia and Herzegovina 1
16 Botswana 1
17 Brazil 1
18 Bulgaria 1
19 Burkina Faso 1
20 Burma 1
21 Burundi 1
22 Cabo Verde 1
23 Cambodia 1
24 Cameroon 1
25 Canada 1
26 Central African Republic 1
27 Chad 1
28 Chile 1
29 China 1
30 Colombia 1
31 Comoros 1
32 Costa Rica 1
33 Côte d'Ivoire 1
34 Croatia 1
35 Cyprus 1
36 Czech Republic 1
37 Democratic Republic of Congo 1
38 Denmark 1
39 Djibouti 1
40 Dominican Republic 1
41 Ecuador 1
42 Egypt 1
43 El Salvador 1
44 Equatorial Guinea 1
45 Eritrea 1
46 Estonia 1
47 Eswatini 1
48 Ethiopia 1
49 Fiji 1
50 Finland 1
51 France 1
52 Gabon 1
53 Georgia 1
54 Germany 1
55 Ghana 1
56 Greece 1
57 Guatemala 1
58 Guinea 1
59 Guinea-Bissau 1
60 Guyana 1
61 Haiti 1
62 Honduras 1
63 Hungary 1
64 Iceland 1
65 India 1
66 Indonesia 1
67 Ireland 1
68 Israel 1
69 Italy 1
70 Jamaica 1
71 Japan 1
72 Jordan 1
73 Kazakhstan 1
74 Kenya 1
75 Kuwait 1
76 Kyrgyz Republic 1
77 Laos 1
78 Latvia 1
79 Lesotho 1
80 Liberia 1
81 Lithuania 1
82 Luxembourg 1
83 Madagascar 1
84 Malawi 1
85 Malaysia 1
86 Mali 1
87 Malta 1
88 Mauritania 1
89 Mauritius 1
90 Mexico 1
91 Moldova 1
92 Mongolia 1
93 Montenegro 1
94 Morocco 1
95 Mozambique 1
96 Namibia 1
97 Nepal 1
98 Netherlands 1
99 New Zealand 1
100 Nicaragua 1
101 Niger 1
102 Nigeria 1
103 North Macedonia 1
104 Norway 1
105 Oman 1
106 Pakistan 1
107 Panama 1
108 Papua New Guinea 1
109 Paraguay 1
110 Peru 1
111 Poland 1
112 Portugal 1
113 Qatar 1
114 Republic of Congo 1
115 Romania 1
116 Russia 1
117 Rwanda 1
118 Saudi Arabia 1
119 Senegal 1
120 Serbia 1
121 Sierra Leone 1
122 Singapore 1
123 Slovakia 1
124 Slovenia 1
125 South Africa 1
126 South Korea 1
127 Spain 1
128 Sri Lanka 1
129 Suriname 1
130 Sweden 1
131 Switzerland 1
132 Taiwan 1
133 Tajikistan 1
134 Tanzania 1
135 Thailand 1
136 The Gambia 1
137 The Philippines 1
138 Timor-Leste 1
139 Togo 1
140 Trinidad and Tobago 1
141 Tunisia 1
142 Türkiye 1
143 Turkmenistan 1
144 Uganda 1
145 United Arab Emirates 1
146 United Kingdom 1
147 United States 1
148 Uruguay 1
149 Uzbekistan 1
150 Vietnam 1
151 Zambia 1
cluster_div1 |> filter(Cluster == 2) Country Cluster
1 Argentina 2
2 Cuba 2
3 Iran 2
4 Lebanon 2
5 Sudan 2
6 Venezuela 2
7 Zimbabwe 2
cluster_div1 |> filter(Cluster == 3) Country Cluster
1 North Korea 3
library(factoextra)
set.seed(2025) # Establece la semilla.
fviz_nbclust(subdata, kmeans, method = "wss", k.max = 15) +
geom_vline(xintercept = 3, linetype = 2) +
labs(subtitle = "Método Elbow") + theme_bw()Tenemos que identificar en qué punto comienza a haber estabilidad en la suma de cuadrados total dentro de los clúster
library(factoextra)
set.seed(2025)
fviz_nbclust(subdata, kmeans,
method = "silhouette", k.max = 15) +
labs(subtitle = "Silhouette method")library(NbClust)
set.seed(2025)
res.nbclust2 <- NbClust(subdata, distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans", index ="all") *** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 2 proposed 2 as the best number of clusters
* 12 proposed 3 as the best number of clusters
* 4 proposed 4 as the best number of clusters
* 2 proposed 5 as the best number of clusters
* 1 proposed 9 as the best number of clusters
* 2 proposed 10 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 3
*******************************************************************
La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.
set.seed(2025)
km1 <- kmeans(subdata,
centers = 3, # Número de Cluster
iter.max = 100, # Número de iteraciones máxima
nstart = 25, # Número de puntos iniciales
algorithm = "Lloyd")Visualizando
library(factoextra)
fviz_cluster(km1, data = subdata, ellipse.type = "convex") +
theme_classic()Usando el índice de Silueta
km_clusters1 <- eclust(x = subdata, FUNcluster = "kmeans",
k = 3, seed = 2025,
hc_metric = "euclidean",
graph = FALSE) fviz_silhouette(sil.obj = km_clusters1,
print.summary = TRUE,
palette = "jco",
ggtheme = theme_classic()) cluster size ave.sil.width
1 1 8 0.32
2 2 80 0.26
3 3 71 0.31
km_clusters1$centers Property Rights Government Integrity Judicial Effectiveness Tax Burden
1 -0.7113993 -0.6463516 -0.6052632 -0.8185619
2 -0.2982207 -0.3203980 -0.3097076 0.1807692
3 0.6592365 0.7581342 0.6428466 -0.2090537
Government Spending Fiscal Health Business Freedom Labor Freedom
1 -0.2752066 -0.5667408 -1.1279599 -2.1581633
2 0.1420248 -0.2154894 -0.5224499 -0.4616071
3 -0.5952741 -0.0755456 0.4127351 0.5635240
Monetary Freedom Trade Freedom Investment Freedom Financial Freedom
1 -7.8624161 -1.6526846 -1.7500000 -1.6250000
2 -0.4739933 -0.4449664 -0.5650000 -0.5312500
3 0.3017298 0.4614803 0.3126761 0.6443662
Free and fair elections Civil liberties
1 -0.6044588 -0.5356796
2 -0.3961161 -0.3466505
3 0.1784617 0.4073841
km_clusters1$silinfo$widths |> data.frame() |> filter(sil_width<0) cluster neighbor sil_width
Iran 1 2 -0.12602429
Brazil 2 3 -0.01065274
Mexico 2 3 -0.03395911
3 observaciones (países) mal clasificados
# Crear tabla País - Clúster
cluster_km1 <- data.frame(
Country = data$Country,
Cluster = km_clusters1$cluster
)
# Ver la lista de países por clúster
cluster_km1 |>
arrange(Cluster) Country Cluster
Argentina Argentina 1
Cuba Cuba 1
Iran Iran 1
Lebanon Lebanon 1
North Korea North Korea 1
Sudan Sudan 1
Venezuela Venezuela 1
Zimbabwe Zimbabwe 1
Algeria Algeria 2
Angola Angola 2
Azerbaijan Azerbaijan 2
Bangladesh Bangladesh 2
Belarus Belarus 2
Benin Benin 2
Bhutan Bhutan 2
Bolivia Bolivia 2
Brazil Brazil 2
Burkina Faso Burkina Faso 2
Burma Burma 2
Burundi Burundi 2
Cambodia Cambodia 2
Cameroon Cameroon 2
Central African Republic Central African Republic 2
Chad Chad 2
China China 2
Comoros Comoros 2
Côte d'Ivoire Côte d'Ivoire 2
Democratic Republic of Congo Democratic Republic of Congo 2
Djibouti Djibouti 2
Dominican Republic Dominican Republic 2
Ecuador Ecuador 2
Egypt Egypt 2
El Salvador El Salvador 2
Equatorial Guinea Equatorial Guinea 2
Eritrea Eritrea 2
Eswatini Eswatini 2
Ethiopia Ethiopia 2
Gabon Gabon 2
Ghana Ghana 2
Guatemala Guatemala 2
Guinea Guinea 2
Guinea-Bissau Guinea-Bissau 2
Guyana Guyana 2
Haiti Haiti 2
Honduras Honduras 2
India India 2
Kazakhstan Kazakhstan 2
Kenya Kenya 2
Kuwait Kuwait 2
Kyrgyz Republic Kyrgyz Republic 2
Laos Laos 2
Lesotho Lesotho 2
Liberia Liberia 2
Madagascar Madagascar 2
Malawi Malawi 2
Mali Mali 2
Mauritania Mauritania 2
Mexico Mexico 2
Moldova Moldova 2
Mozambique Mozambique 2
Nepal Nepal 2
Nicaragua Nicaragua 2
Niger Niger 2
Nigeria Nigeria 2
Pakistan Pakistan 2
Papua New Guinea Papua New Guinea 2
Paraguay Paraguay 2
Republic of Congo Republic of Congo 2
Russia Russia 2
Rwanda Rwanda 2
Saudi Arabia Saudi Arabia 2
Senegal Senegal 2
Sierra Leone Sierra Leone 2
Sri Lanka Sri Lanka 2
Suriname Suriname 2
Tajikistan Tajikistan 2
Tanzania Tanzania 2
Thailand Thailand 2
The Gambia The Gambia 2
Timor-Leste Timor-Leste 2
Togo Togo 2
Tunisia Tunisia 2
Türkiye Türkiye 2
Turkmenistan Turkmenistan 2
Uganda Uganda 2
Uzbekistan Uzbekistan 2
Vietnam Vietnam 2
Zambia Zambia 2
Albania Albania 3
Armenia Armenia 3
Australia Australia 3
Austria Austria 3
Bahrain Bahrain 3
Belgium Belgium 3
Bosnia and Herzegovina Bosnia and Herzegovina 3
Botswana Botswana 3
Bulgaria Bulgaria 3
Cabo Verde Cabo Verde 3
Canada Canada 3
Chile Chile 3
Colombia Colombia 3
Costa Rica Costa Rica 3
Croatia Croatia 3
Cyprus Cyprus 3
Czech Republic Czech Republic 3
Denmark Denmark 3
Estonia Estonia 3
Fiji Fiji 3
Finland Finland 3
France France 3
Georgia Georgia 3
Germany Germany 3
Greece Greece 3
Hungary Hungary 3
Iceland Iceland 3
Indonesia Indonesia 3
Ireland Ireland 3
Israel Israel 3
Italy Italy 3
Jamaica Jamaica 3
Japan Japan 3
Jordan Jordan 3
Latvia Latvia 3
Lithuania Lithuania 3
Luxembourg Luxembourg 3
Malaysia Malaysia 3
Malta Malta 3
Mauritius Mauritius 3
Mongolia Mongolia 3
Montenegro Montenegro 3
Morocco Morocco 3
Namibia Namibia 3
Netherlands Netherlands 3
New Zealand New Zealand 3
North Macedonia North Macedonia 3
Norway Norway 3
Oman Oman 3
Panama Panama 3
Peru Peru 3
Poland Poland 3
Portugal Portugal 3
Qatar Qatar 3
Romania Romania 3
Serbia Serbia 3
Singapore Singapore 3
Slovakia Slovakia 3
Slovenia Slovenia 3
South Africa South Africa 3
South Korea South Korea 3
Spain Spain 3
Sweden Sweden 3
Switzerland Switzerland 3
Taiwan Taiwan 3
The Philippines The Philippines 3
Trinidad and Tobago Trinidad and Tobago 3
United Arab Emirates United Arab Emirates 3
United Kingdom United Kingdom 3
United States United States 3
Uruguay Uruguay 3
cluster_km1 |> filter(Cluster == 1) Country Cluster
Argentina Argentina 1
Cuba Cuba 1
Iran Iran 1
Lebanon Lebanon 1
North Korea North Korea 1
Sudan Sudan 1
Venezuela Venezuela 1
Zimbabwe Zimbabwe 1
cluster_km1 |> filter(Cluster == 2) Country Cluster
Algeria Algeria 2
Angola Angola 2
Azerbaijan Azerbaijan 2
Bangladesh Bangladesh 2
Belarus Belarus 2
Benin Benin 2
Bhutan Bhutan 2
Bolivia Bolivia 2
Brazil Brazil 2
Burkina Faso Burkina Faso 2
Burma Burma 2
Burundi Burundi 2
Cambodia Cambodia 2
Cameroon Cameroon 2
Central African Republic Central African Republic 2
Chad Chad 2
China China 2
Comoros Comoros 2
Côte d'Ivoire Côte d'Ivoire 2
Democratic Republic of Congo Democratic Republic of Congo 2
Djibouti Djibouti 2
Dominican Republic Dominican Republic 2
Ecuador Ecuador 2
Egypt Egypt 2
El Salvador El Salvador 2
Equatorial Guinea Equatorial Guinea 2
Eritrea Eritrea 2
Eswatini Eswatini 2
Ethiopia Ethiopia 2
Gabon Gabon 2
Ghana Ghana 2
Guatemala Guatemala 2
Guinea Guinea 2
Guinea-Bissau Guinea-Bissau 2
Guyana Guyana 2
Haiti Haiti 2
Honduras Honduras 2
India India 2
Kazakhstan Kazakhstan 2
Kenya Kenya 2
Kuwait Kuwait 2
Kyrgyz Republic Kyrgyz Republic 2
Laos Laos 2
Lesotho Lesotho 2
Liberia Liberia 2
Madagascar Madagascar 2
Malawi Malawi 2
Mali Mali 2
Mauritania Mauritania 2
Mexico Mexico 2
Moldova Moldova 2
Mozambique Mozambique 2
Nepal Nepal 2
Nicaragua Nicaragua 2
Niger Niger 2
Nigeria Nigeria 2
Pakistan Pakistan 2
Papua New Guinea Papua New Guinea 2
Paraguay Paraguay 2
Republic of Congo Republic of Congo 2
Russia Russia 2
Rwanda Rwanda 2
Saudi Arabia Saudi Arabia 2
Senegal Senegal 2
Sierra Leone Sierra Leone 2
Sri Lanka Sri Lanka 2
Suriname Suriname 2
Tajikistan Tajikistan 2
Tanzania Tanzania 2
Thailand Thailand 2
The Gambia The Gambia 2
Timor-Leste Timor-Leste 2
Togo Togo 2
Tunisia Tunisia 2
Türkiye Türkiye 2
Turkmenistan Turkmenistan 2
Uganda Uganda 2
Uzbekistan Uzbekistan 2
Vietnam Vietnam 2
Zambia Zambia 2
cluster_km1 |> filter(Cluster == 3) Country Cluster
Albania Albania 3
Armenia Armenia 3
Australia Australia 3
Austria Austria 3
Bahrain Bahrain 3
Belgium Belgium 3
Bosnia and Herzegovina Bosnia and Herzegovina 3
Botswana Botswana 3
Bulgaria Bulgaria 3
Cabo Verde Cabo Verde 3
Canada Canada 3
Chile Chile 3
Colombia Colombia 3
Costa Rica Costa Rica 3
Croatia Croatia 3
Cyprus Cyprus 3
Czech Republic Czech Republic 3
Denmark Denmark 3
Estonia Estonia 3
Fiji Fiji 3
Finland Finland 3
France France 3
Georgia Georgia 3
Germany Germany 3
Greece Greece 3
Hungary Hungary 3
Iceland Iceland 3
Indonesia Indonesia 3
Ireland Ireland 3
Israel Israel 3
Italy Italy 3
Jamaica Jamaica 3
Japan Japan 3
Jordan Jordan 3
Latvia Latvia 3
Lithuania Lithuania 3
Luxembourg Luxembourg 3
Malaysia Malaysia 3
Malta Malta 3
Mauritius Mauritius 3
Mongolia Mongolia 3
Montenegro Montenegro 3
Morocco Morocco 3
Namibia Namibia 3
Netherlands Netherlands 3
New Zealand New Zealand 3
North Macedonia North Macedonia 3
Norway Norway 3
Oman Oman 3
Panama Panama 3
Peru Peru 3
Poland Poland 3
Portugal Portugal 3
Qatar Qatar 3
Romania Romania 3
Serbia Serbia 3
Singapore Singapore 3
Slovakia Slovakia 3
Slovenia Slovenia 3
South Africa South Africa 3
South Korea South Korea 3
Spain Spain 3
Sweden Sweden 3
Switzerland Switzerland 3
Taiwan Taiwan 3
The Philippines The Philippines 3
Trinidad and Tobago Trinidad and Tobago 3
United Arab Emirates United Arab Emirates 3
United Kingdom United Kingdom 3
United States United States 3
Uruguay Uruguay 3
Cálculo de distancias
distancias_pca <- dist(data_pcas[, 1:2], method = "euclidean")Utilizaremos el método del codo (elbow method) mediante la función fviz_nbclust() del paquete factoextra.
Primero usamos el criterio de Suma de Cuadrados dentro de clusters:
fviz_nbclust(data_pcas[, 1:2], # Solo PC1 y PC2
hcut, # Método de clusterización elegido
diss = distancias_pca, # Euclidiana
method = "wss", # Método
k.max = 15, # Pruebo con 15 cluster
verbose = F)Usando el gráfico de silueta:
fviz_nbclust(data_pcas[, 1:2], # Solo PC1 y PC2
hcut, # Método de clusterización elegido
diss = distancias_pca, # Euclidiana
method = "silhouette", # Método
k.max = 15, # Pruebo con 15 cluster
verbose = F)Complementaremos el análisis visual con la función NbClust() que aplica simultáneamente hasta 30 índices diferentes para determinar el número óptimo de clusters.
library(NbClust)
set.seed(2025)
res.nbclust3 <- NbClust(data_pcas[, 1:2], # Data
distance = "euclidean", # Distancia utilizada
min.nc = 2, # Mínimo
max.nc = 10, # Máximo
method = "ward.D", # Método
index ="all") # Índices considerados*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 1 proposed 2 as the best number of clusters
* 13 proposed 3 as the best number of clusters
* 2 proposed 4 as the best number of clusters
* 4 proposed 5 as the best number of clusters
* 2 proposed 8 as the best number of clusters
* 1 proposed 10 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 3
*******************************************************************
La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.
aglomerativo2 = hcut(x = distancias_pca, # Matriz de distancias
k = 3, # Se visualizarán 2
hc_func='agnes', # Aglomerativo
hc_method = "ward.D") # Criterio para aglomerarPrimero, veamos la asignación de cluster:
fviz_dend(aglomerativo2, # Nuestro objeto cluster creado
rect = TRUE, # Resalta los cluster solicitados
cex = 0.5) # Tamaño de etiquetasfviz_silhouette(aglomerativo2, label=TRUE) cluster size ave.sil.width
1 1 104 0.46
2 2 15 0.24
3 3 40 0.65
Identificación de casos mal clasificados:
aglomerativo2$silinfo$widths |> data.frame() |> filter(sil_width<0) cluster neighbor sil_width
Bulgaria 1 3 -0.03251306
Jamaica 1 3 -0.04107258
Cabo Verde 1 3 -0.06670040
Botswana 1 3 -0.10893057
Uruguay 1 3 -0.16584989
Romania 1 3 -0.19343016
Suriname 2 1 -0.07007601
Laos 2 1 -0.20431921
Burundi 2 1 -0.21278928
1 observacion (países) mal clasificada
# Crear tabla País - Clúster
cluster_aglo2 <- data.frame(
Country = data$Country,
Cluster = aglomerativo2$cluster
)
# Ver la lista de países por clúster
cluster_aglo2 |>
arrange(Cluster) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Azerbaijan 1
6 Bahrain 1
7 Bangladesh 1
8 Belarus 1
9 Benin 1
10 Bhutan 1
11 Bolivia 1
12 Bosnia and Herzegovina 1
13 Botswana 1
14 Brazil 1
15 Bulgaria 1
16 Burkina Faso 1
17 Burma 1
18 Cabo Verde 1
19 Cambodia 1
20 Cameroon 1
21 Central African Republic 1
22 Chad 1
23 China 1
24 Colombia 1
25 Comoros 1
26 Costa Rica 1
27 Côte d'Ivoire 1
28 Democratic Republic of Congo 1
29 Djibouti 1
30 Dominican Republic 1
31 Ecuador 1
32 Egypt 1
33 El Salvador 1
34 Equatorial Guinea 1
35 Eritrea 1
36 Eswatini 1
37 Fiji 1
38 Gabon 1
39 Georgia 1
40 Ghana 1
41 Guatemala 1
42 Guinea 1
43 Guinea-Bissau 1
44 Guyana 1
45 Honduras 1
46 India 1
47 Indonesia 1
48 Jamaica 1
49 Jordan 1
50 Kazakhstan 1
51 Kenya 1
52 Kuwait 1
53 Kyrgyz Republic 1
54 Lesotho 1
55 Liberia 1
56 Madagascar 1
57 Malawi 1
58 Malaysia 1
59 Mali 1
60 Mauritania 1
61 Mexico 1
62 Moldova 1
63 Mongolia 1
64 Montenegro 1
65 Morocco 1
66 Mozambique 1
67 Namibia 1
68 Nepal 1
69 Nicaragua 1
70 Niger 1
71 Nigeria 1
72 North Macedonia 1
73 Oman 1
74 Pakistan 1
75 Panama 1
76 Papua New Guinea 1
77 Paraguay 1
78 Peru 1
79 Qatar 1
80 Republic of Congo 1
81 Romania 1
82 Russia 1
83 Rwanda 1
84 Saudi Arabia 1
85 Senegal 1
86 Serbia 1
87 South Africa 1
88 Sri Lanka 1
89 Tajikistan 1
90 Tanzania 1
91 Thailand 1
92 The Gambia 1
93 The Philippines 1
94 Timor-Leste 1
95 Togo 1
96 Trinidad and Tobago 1
97 Tunisia 1
98 Turkmenistan 1
99 Uganda 1
100 United Arab Emirates 1
101 Uruguay 1
102 Uzbekistan 1
103 Vietnam 1
104 Zambia 1
105 Argentina 2
106 Burundi 2
107 Cuba 2
108 Ethiopia 2
109 Haiti 2
110 Iran 2
111 Laos 2
112 Lebanon 2
113 North Korea 2
114 Sierra Leone 2
115 Sudan 2
116 Suriname 2
117 Türkiye 2
118 Venezuela 2
119 Zimbabwe 2
120 Australia 3
121 Austria 3
122 Belgium 3
123 Canada 3
124 Chile 3
125 Croatia 3
126 Cyprus 3
127 Czech Republic 3
128 Denmark 3
129 Estonia 3
130 Finland 3
131 France 3
132 Germany 3
133 Greece 3
134 Hungary 3
135 Iceland 3
136 Ireland 3
137 Israel 3
138 Italy 3
139 Japan 3
140 Latvia 3
141 Lithuania 3
142 Luxembourg 3
143 Malta 3
144 Mauritius 3
145 Netherlands 3
146 New Zealand 3
147 Norway 3
148 Poland 3
149 Portugal 3
150 Singapore 3
151 Slovakia 3
152 Slovenia 3
153 South Korea 3
154 Spain 3
155 Sweden 3
156 Switzerland 3
157 Taiwan 3
158 United Kingdom 3
159 United States 3
cluster_aglo2 |> filter(Cluster == 1) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Azerbaijan 1
6 Bahrain 1
7 Bangladesh 1
8 Belarus 1
9 Benin 1
10 Bhutan 1
11 Bolivia 1
12 Bosnia and Herzegovina 1
13 Botswana 1
14 Brazil 1
15 Bulgaria 1
16 Burkina Faso 1
17 Burma 1
18 Cabo Verde 1
19 Cambodia 1
20 Cameroon 1
21 Central African Republic 1
22 Chad 1
23 China 1
24 Colombia 1
25 Comoros 1
26 Costa Rica 1
27 Côte d'Ivoire 1
28 Democratic Republic of Congo 1
29 Djibouti 1
30 Dominican Republic 1
31 Ecuador 1
32 Egypt 1
33 El Salvador 1
34 Equatorial Guinea 1
35 Eritrea 1
36 Eswatini 1
37 Fiji 1
38 Gabon 1
39 Georgia 1
40 Ghana 1
41 Guatemala 1
42 Guinea 1
43 Guinea-Bissau 1
44 Guyana 1
45 Honduras 1
46 India 1
47 Indonesia 1
48 Jamaica 1
49 Jordan 1
50 Kazakhstan 1
51 Kenya 1
52 Kuwait 1
53 Kyrgyz Republic 1
54 Lesotho 1
55 Liberia 1
56 Madagascar 1
57 Malawi 1
58 Malaysia 1
59 Mali 1
60 Mauritania 1
61 Mexico 1
62 Moldova 1
63 Mongolia 1
64 Montenegro 1
65 Morocco 1
66 Mozambique 1
67 Namibia 1
68 Nepal 1
69 Nicaragua 1
70 Niger 1
71 Nigeria 1
72 North Macedonia 1
73 Oman 1
74 Pakistan 1
75 Panama 1
76 Papua New Guinea 1
77 Paraguay 1
78 Peru 1
79 Qatar 1
80 Republic of Congo 1
81 Romania 1
82 Russia 1
83 Rwanda 1
84 Saudi Arabia 1
85 Senegal 1
86 Serbia 1
87 South Africa 1
88 Sri Lanka 1
89 Tajikistan 1
90 Tanzania 1
91 Thailand 1
92 The Gambia 1
93 The Philippines 1
94 Timor-Leste 1
95 Togo 1
96 Trinidad and Tobago 1
97 Tunisia 1
98 Turkmenistan 1
99 Uganda 1
100 United Arab Emirates 1
101 Uruguay 1
102 Uzbekistan 1
103 Vietnam 1
104 Zambia 1
cluster_aglo2 |> filter(Cluster == 2) Country Cluster
1 Argentina 2
2 Burundi 2
3 Cuba 2
4 Ethiopia 2
5 Haiti 2
6 Iran 2
7 Laos 2
8 Lebanon 2
9 North Korea 2
10 Sierra Leone 2
11 Sudan 2
12 Suriname 2
13 Türkiye 2
14 Venezuela 2
15 Zimbabwe 2
cluster_aglo2 |> filter(Cluster == 3) Country Cluster
1 Australia 3
2 Austria 3
3 Belgium 3
4 Canada 3
5 Chile 3
6 Croatia 3
7 Cyprus 3
8 Czech Republic 3
9 Denmark 3
10 Estonia 3
11 Finland 3
12 France 3
13 Germany 3
14 Greece 3
15 Hungary 3
16 Iceland 3
17 Ireland 3
18 Israel 3
19 Italy 3
20 Japan 3
21 Latvia 3
22 Lithuania 3
23 Luxembourg 3
24 Malta 3
25 Mauritius 3
26 Netherlands 3
27 New Zealand 3
28 Norway 3
29 Poland 3
30 Portugal 3
31 Singapore 3
32 Slovakia 3
33 Slovenia 3
34 South Korea 3
35 Spain 3
36 Sweden 3
37 Switzerland 3
38 Taiwan 3
39 United Kingdom 3
40 United States 3
divisivo2 = hcut(distancias_pca,
k = 3,
hc_func='diana') #Método divisivofviz_dend(divisivo2, # Nuestro objeto cluster creado
rect = TRUE, # Resalta los cluster solicitados
cex = 0.5) # Tamaño de etiquetas# Gráfico de silueta
fviz_silhouette(divisivo2, label=TRUE) cluster size ave.sil.width
1 1 150 0.52
2 2 3 0.39
3 3 6 0.50
Identificación de casos mal clasificados:
divisivo2$silinfo$widths |> data.frame() |> filter(sil_width<0) cluster neighbor sil_width
Sri Lanka 1 2 -0.005008857
Eritrea 1 2 -0.025144444
Burundi 1 2 -0.289728484
Laos 1 2 -0.292980732
Suriname 1 2 -0.342594237
Türkiye 1 2 -0.435756569
Ethiopia 1 2 -0.493263680
Sierra Leone 1 2 -0.528404815
1 observaciones (países) mal clasificadas
# Crear tabla País - Clúster
cluster_div2 <- data.frame(
Country = data$Country,
Cluster = divisivo2$cluster
)
# Ver la lista de países por clúster
cluster_div2 |>
arrange(Cluster) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Australia 1
6 Austria 1
7 Azerbaijan 1
8 Bahrain 1
9 Bangladesh 1
10 Belarus 1
11 Belgium 1
12 Benin 1
13 Bhutan 1
14 Bolivia 1
15 Bosnia and Herzegovina 1
16 Botswana 1
17 Brazil 1
18 Bulgaria 1
19 Burkina Faso 1
20 Burma 1
21 Burundi 1
22 Cabo Verde 1
23 Cambodia 1
24 Cameroon 1
25 Canada 1
26 Central African Republic 1
27 Chad 1
28 Chile 1
29 China 1
30 Colombia 1
31 Comoros 1
32 Costa Rica 1
33 Côte d'Ivoire 1
34 Croatia 1
35 Cyprus 1
36 Czech Republic 1
37 Democratic Republic of Congo 1
38 Denmark 1
39 Djibouti 1
40 Dominican Republic 1
41 Ecuador 1
42 Egypt 1
43 El Salvador 1
44 Equatorial Guinea 1
45 Eritrea 1
46 Estonia 1
47 Eswatini 1
48 Ethiopia 1
49 Fiji 1
50 Finland 1
51 France 1
52 Gabon 1
53 Georgia 1
54 Germany 1
55 Ghana 1
56 Greece 1
57 Guatemala 1
58 Guinea 1
59 Guinea-Bissau 1
60 Guyana 1
61 Honduras 1
62 Hungary 1
63 Iceland 1
64 India 1
65 Indonesia 1
66 Ireland 1
67 Israel 1
68 Italy 1
69 Jamaica 1
70 Japan 1
71 Jordan 1
72 Kazakhstan 1
73 Kenya 1
74 Kuwait 1
75 Kyrgyz Republic 1
76 Laos 1
77 Latvia 1
78 Lesotho 1
79 Liberia 1
80 Lithuania 1
81 Luxembourg 1
82 Madagascar 1
83 Malawi 1
84 Malaysia 1
85 Mali 1
86 Malta 1
87 Mauritania 1
88 Mauritius 1
89 Mexico 1
90 Moldova 1
91 Mongolia 1
92 Montenegro 1
93 Morocco 1
94 Mozambique 1
95 Namibia 1
96 Nepal 1
97 Netherlands 1
98 New Zealand 1
99 Nicaragua 1
100 Niger 1
101 Nigeria 1
102 North Macedonia 1
103 Norway 1
104 Oman 1
105 Pakistan 1
106 Panama 1
107 Papua New Guinea 1
108 Paraguay 1
109 Peru 1
110 Poland 1
111 Portugal 1
112 Qatar 1
113 Republic of Congo 1
114 Romania 1
115 Russia 1
116 Rwanda 1
117 Saudi Arabia 1
118 Senegal 1
119 Serbia 1
120 Sierra Leone 1
121 Singapore 1
122 Slovakia 1
123 Slovenia 1
124 South Africa 1
125 South Korea 1
126 Spain 1
127 Sri Lanka 1
128 Suriname 1
129 Sweden 1
130 Switzerland 1
131 Taiwan 1
132 Tajikistan 1
133 Tanzania 1
134 Thailand 1
135 The Gambia 1
136 The Philippines 1
137 Timor-Leste 1
138 Togo 1
139 Trinidad and Tobago 1
140 Tunisia 1
141 Türkiye 1
142 Turkmenistan 1
143 Uganda 1
144 United Arab Emirates 1
145 United Kingdom 1
146 United States 1
147 Uruguay 1
148 Uzbekistan 1
149 Vietnam 1
150 Zambia 1
151 Argentina 2
152 Haiti 2
153 Iran 2
154 Cuba 3
155 Lebanon 3
156 North Korea 3
157 Sudan 3
158 Venezuela 3
159 Zimbabwe 3
cluster_div2 |> filter(Cluster == 1) Country Cluster
1 Albania 1
2 Algeria 1
3 Angola 1
4 Armenia 1
5 Australia 1
6 Austria 1
7 Azerbaijan 1
8 Bahrain 1
9 Bangladesh 1
10 Belarus 1
11 Belgium 1
12 Benin 1
13 Bhutan 1
14 Bolivia 1
15 Bosnia and Herzegovina 1
16 Botswana 1
17 Brazil 1
18 Bulgaria 1
19 Burkina Faso 1
20 Burma 1
21 Burundi 1
22 Cabo Verde 1
23 Cambodia 1
24 Cameroon 1
25 Canada 1
26 Central African Republic 1
27 Chad 1
28 Chile 1
29 China 1
30 Colombia 1
31 Comoros 1
32 Costa Rica 1
33 Côte d'Ivoire 1
34 Croatia 1
35 Cyprus 1
36 Czech Republic 1
37 Democratic Republic of Congo 1
38 Denmark 1
39 Djibouti 1
40 Dominican Republic 1
41 Ecuador 1
42 Egypt 1
43 El Salvador 1
44 Equatorial Guinea 1
45 Eritrea 1
46 Estonia 1
47 Eswatini 1
48 Ethiopia 1
49 Fiji 1
50 Finland 1
51 France 1
52 Gabon 1
53 Georgia 1
54 Germany 1
55 Ghana 1
56 Greece 1
57 Guatemala 1
58 Guinea 1
59 Guinea-Bissau 1
60 Guyana 1
61 Honduras 1
62 Hungary 1
63 Iceland 1
64 India 1
65 Indonesia 1
66 Ireland 1
67 Israel 1
68 Italy 1
69 Jamaica 1
70 Japan 1
71 Jordan 1
72 Kazakhstan 1
73 Kenya 1
74 Kuwait 1
75 Kyrgyz Republic 1
76 Laos 1
77 Latvia 1
78 Lesotho 1
79 Liberia 1
80 Lithuania 1
81 Luxembourg 1
82 Madagascar 1
83 Malawi 1
84 Malaysia 1
85 Mali 1
86 Malta 1
87 Mauritania 1
88 Mauritius 1
89 Mexico 1
90 Moldova 1
91 Mongolia 1
92 Montenegro 1
93 Morocco 1
94 Mozambique 1
95 Namibia 1
96 Nepal 1
97 Netherlands 1
98 New Zealand 1
99 Nicaragua 1
100 Niger 1
101 Nigeria 1
102 North Macedonia 1
103 Norway 1
104 Oman 1
105 Pakistan 1
106 Panama 1
107 Papua New Guinea 1
108 Paraguay 1
109 Peru 1
110 Poland 1
111 Portugal 1
112 Qatar 1
113 Republic of Congo 1
114 Romania 1
115 Russia 1
116 Rwanda 1
117 Saudi Arabia 1
118 Senegal 1
119 Serbia 1
120 Sierra Leone 1
121 Singapore 1
122 Slovakia 1
123 Slovenia 1
124 South Africa 1
125 South Korea 1
126 Spain 1
127 Sri Lanka 1
128 Suriname 1
129 Sweden 1
130 Switzerland 1
131 Taiwan 1
132 Tajikistan 1
133 Tanzania 1
134 Thailand 1
135 The Gambia 1
136 The Philippines 1
137 Timor-Leste 1
138 Togo 1
139 Trinidad and Tobago 1
140 Tunisia 1
141 Türkiye 1
142 Turkmenistan 1
143 Uganda 1
144 United Arab Emirates 1
145 United Kingdom 1
146 United States 1
147 Uruguay 1
148 Uzbekistan 1
149 Vietnam 1
150 Zambia 1
cluster_div2 |> filter(Cluster == 2) Country Cluster
1 Argentina 2
2 Haiti 2
3 Iran 2
cluster_div2 |> filter(Cluster == 3) Country Cluster
1 Cuba 3
2 Lebanon 3
3 North Korea 3
4 Sudan 3
5 Venezuela 3
6 Zimbabwe 3
library(factoextra)
set.seed(2025) # Establece la semilla.
fviz_nbclust(data_pcas[, 1:2], kmeans, method = "wss", k.max = 15) +
geom_vline(xintercept = 3, linetype = 2) +
labs(subtitle = "Método Elbow") + theme_bw()Tenemos que identificar en qué punto comienza a haber estabilidad en la suma de cuadrados total dentro de los clúster
library(factoextra)
set.seed(2025)
fviz_nbclust(data_pcas[, 1:2], kmeans,
method = "silhouette", k.max = 15) +
labs(subtitle = "Silhouette method")library(NbClust)
set.seed(2025)
res.nbclust4 <- NbClust(data_pcas[, 1:2], distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans", index ="all") *** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 4 proposed 2 as the best number of clusters
* 11 proposed 3 as the best number of clusters
* 6 proposed 5 as the best number of clusters
* 1 proposed 6 as the best number of clusters
* 1 proposed 10 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 3
*******************************************************************
La función nos indica que el mejor número de clusters es 3 deacuerdo a la regla de la mayoría.
set.seed(2025)
km2 <- kmeans(data_pcas[, 1:2],
centers = 3, # Número de Cluster
iter.max = 100, # Número de iteraciones máxima
nstart = 25, # Número de puntos iniciales
algorithm = "Lloyd")Visualizando
library(factoextra)
fviz_cluster(km2, data = data_pcas[, 1:2], ellipse.type = "convex") +
theme_classic()Usando el índice de Silueta
km_clusters2 <- eclust(x = data_pcas[, 1:2], FUNcluster = "kmeans",
k = 3, seed = 2025,
hc_metric = "euclidean",
graph = FALSE) fviz_silhouette(sil.obj = km_clusters2,
print.summary = TRUE,
palette = "jco",
ggtheme = theme_classic()) cluster size ave.sil.width
1 1 8 0.50
2 2 65 0.47
3 3 86 0.44
km_clusters2$centers pc1 pc2
1 8.450369 -2.7662199
2 -1.373519 -1.3604519
3 1.134167 0.6118582
km_clusters2$silinfo$widths |> data.frame() |> filter(sil_width<0) cluster neighbor sil_width
Iran 1 3 -0.032604811
The Philippines 3 2 -0.001626911
Indonesia 3 2 -0.004151305
Trinidad and Tobago 3 2 -0.006578924
4 obsv. mal clasificadas
# Crear tabla País - Clúster
cluster_km2 <- data.frame(
Country = data$Country,
Cluster = km_clusters2$cluster
)
# Ver la lista de países por clúster
cluster_km2 |>
arrange(Cluster) Country Cluster
Argentina Argentina 1
Cuba Cuba 1
Iran Iran 1
Lebanon Lebanon 1
North Korea North Korea 1
Sudan Sudan 1
Venezuela Venezuela 1
Zimbabwe Zimbabwe 1
Albania Albania 2
Armenia Armenia 2
Australia Australia 2
Austria Austria 2
Bahrain Bahrain 2
Belgium Belgium 2
Bosnia and Herzegovina Bosnia and Herzegovina 2
Botswana Botswana 2
Bulgaria Bulgaria 2
Cabo Verde Cabo Verde 2
Canada Canada 2
Chile Chile 2
Colombia Colombia 2
Costa Rica Costa Rica 2
Croatia Croatia 2
Cyprus Cyprus 2
Czech Republic Czech Republic 2
Denmark Denmark 2
Estonia Estonia 2
Fiji Fiji 2
Finland Finland 2
France France 2
Georgia Georgia 2
Germany Germany 2
Greece Greece 2
Hungary Hungary 2
Iceland Iceland 2
Ireland Ireland 2
Israel Israel 2
Italy Italy 2
Jamaica Jamaica 2
Japan Japan 2
Jordan Jordan 2
Latvia Latvia 2
Lithuania Lithuania 2
Luxembourg Luxembourg 2
Malaysia Malaysia 2
Malta Malta 2
Mauritius Mauritius 2
Montenegro Montenegro 2
Morocco Morocco 2
Namibia Namibia 2
Netherlands Netherlands 2
New Zealand New Zealand 2
North Macedonia North Macedonia 2
Norway Norway 2
Panama Panama 2
Peru Peru 2
Poland Poland 2
Portugal Portugal 2
Romania Romania 2
Serbia Serbia 2
Singapore Singapore 2
Slovakia Slovakia 2
Slovenia Slovenia 2
South Africa South Africa 2
South Korea South Korea 2
Spain Spain 2
Sweden Sweden 2
Switzerland Switzerland 2
Taiwan Taiwan 2
United Arab Emirates United Arab Emirates 2
United Kingdom United Kingdom 2
United States United States 2
Uruguay Uruguay 2
Algeria Algeria 3
Angola Angola 3
Azerbaijan Azerbaijan 3
Bangladesh Bangladesh 3
Belarus Belarus 3
Benin Benin 3
Bhutan Bhutan 3
Bolivia Bolivia 3
Brazil Brazil 3
Burkina Faso Burkina Faso 3
Burma Burma 3
Burundi Burundi 3
Cambodia Cambodia 3
Cameroon Cameroon 3
Central African Republic Central African Republic 3
Chad Chad 3
China China 3
Comoros Comoros 3
Côte d'Ivoire Côte d'Ivoire 3
Democratic Republic of Congo Democratic Republic of Congo 3
Djibouti Djibouti 3
Dominican Republic Dominican Republic 3
Ecuador Ecuador 3
Egypt Egypt 3
El Salvador El Salvador 3
Equatorial Guinea Equatorial Guinea 3
Eritrea Eritrea 3
Eswatini Eswatini 3
Ethiopia Ethiopia 3
Gabon Gabon 3
Ghana Ghana 3
Guatemala Guatemala 3
Guinea Guinea 3
Guinea-Bissau Guinea-Bissau 3
Guyana Guyana 3
Haiti Haiti 3
Honduras Honduras 3
India India 3
Indonesia Indonesia 3
Kazakhstan Kazakhstan 3
Kenya Kenya 3
Kuwait Kuwait 3
Kyrgyz Republic Kyrgyz Republic 3
Laos Laos 3
Lesotho Lesotho 3
Liberia Liberia 3
Madagascar Madagascar 3
Malawi Malawi 3
Mali Mali 3
Mauritania Mauritania 3
Mexico Mexico 3
Moldova Moldova 3
Mongolia Mongolia 3
Mozambique Mozambique 3
Nepal Nepal 3
Nicaragua Nicaragua 3
Niger Niger 3
Nigeria Nigeria 3
Oman Oman 3
Pakistan Pakistan 3
Papua New Guinea Papua New Guinea 3
Paraguay Paraguay 3
Qatar Qatar 3
Republic of Congo Republic of Congo 3
Russia Russia 3
Rwanda Rwanda 3
Saudi Arabia Saudi Arabia 3
Senegal Senegal 3
Sierra Leone Sierra Leone 3
Sri Lanka Sri Lanka 3
Suriname Suriname 3
Tajikistan Tajikistan 3
Tanzania Tanzania 3
Thailand Thailand 3
The Gambia The Gambia 3
The Philippines The Philippines 3
Timor-Leste Timor-Leste 3
Togo Togo 3
Trinidad and Tobago Trinidad and Tobago 3
Tunisia Tunisia 3
Türkiye Türkiye 3
Turkmenistan Turkmenistan 3
Uganda Uganda 3
Uzbekistan Uzbekistan 3
Vietnam Vietnam 3
Zambia Zambia 3
cluster_km2 |> filter(Cluster == 1) Country Cluster
Argentina Argentina 1
Cuba Cuba 1
Iran Iran 1
Lebanon Lebanon 1
North Korea North Korea 1
Sudan Sudan 1
Venezuela Venezuela 1
Zimbabwe Zimbabwe 1
cluster_km2 |> filter(Cluster == 2) Country Cluster
Albania Albania 2
Armenia Armenia 2
Australia Australia 2
Austria Austria 2
Bahrain Bahrain 2
Belgium Belgium 2
Bosnia and Herzegovina Bosnia and Herzegovina 2
Botswana Botswana 2
Bulgaria Bulgaria 2
Cabo Verde Cabo Verde 2
Canada Canada 2
Chile Chile 2
Colombia Colombia 2
Costa Rica Costa Rica 2
Croatia Croatia 2
Cyprus Cyprus 2
Czech Republic Czech Republic 2
Denmark Denmark 2
Estonia Estonia 2
Fiji Fiji 2
Finland Finland 2
France France 2
Georgia Georgia 2
Germany Germany 2
Greece Greece 2
Hungary Hungary 2
Iceland Iceland 2
Ireland Ireland 2
Israel Israel 2
Italy Italy 2
Jamaica Jamaica 2
Japan Japan 2
Jordan Jordan 2
Latvia Latvia 2
Lithuania Lithuania 2
Luxembourg Luxembourg 2
Malaysia Malaysia 2
Malta Malta 2
Mauritius Mauritius 2
Montenegro Montenegro 2
Morocco Morocco 2
Namibia Namibia 2
Netherlands Netherlands 2
New Zealand New Zealand 2
North Macedonia North Macedonia 2
Norway Norway 2
Panama Panama 2
Peru Peru 2
Poland Poland 2
Portugal Portugal 2
Romania Romania 2
Serbia Serbia 2
Singapore Singapore 2
Slovakia Slovakia 2
Slovenia Slovenia 2
South Africa South Africa 2
South Korea South Korea 2
Spain Spain 2
Sweden Sweden 2
Switzerland Switzerland 2
Taiwan Taiwan 2
United Arab Emirates United Arab Emirates 2
United Kingdom United Kingdom 2
United States United States 2
Uruguay Uruguay 2
cluster_km2 |> filter(Cluster == 3) Country Cluster
Algeria Algeria 3
Angola Angola 3
Azerbaijan Azerbaijan 3
Bangladesh Bangladesh 3
Belarus Belarus 3
Benin Benin 3
Bhutan Bhutan 3
Bolivia Bolivia 3
Brazil Brazil 3
Burkina Faso Burkina Faso 3
Burma Burma 3
Burundi Burundi 3
Cambodia Cambodia 3
Cameroon Cameroon 3
Central African Republic Central African Republic 3
Chad Chad 3
China China 3
Comoros Comoros 3
Côte d'Ivoire Côte d'Ivoire 3
Democratic Republic of Congo Democratic Republic of Congo 3
Djibouti Djibouti 3
Dominican Republic Dominican Republic 3
Ecuador Ecuador 3
Egypt Egypt 3
El Salvador El Salvador 3
Equatorial Guinea Equatorial Guinea 3
Eritrea Eritrea 3
Eswatini Eswatini 3
Ethiopia Ethiopia 3
Gabon Gabon 3
Ghana Ghana 3
Guatemala Guatemala 3
Guinea Guinea 3
Guinea-Bissau Guinea-Bissau 3
Guyana Guyana 3
Haiti Haiti 3
Honduras Honduras 3
India India 3
Indonesia Indonesia 3
Kazakhstan Kazakhstan 3
Kenya Kenya 3
Kuwait Kuwait 3
Kyrgyz Republic Kyrgyz Republic 3
Laos Laos 3
Lesotho Lesotho 3
Liberia Liberia 3
Madagascar Madagascar 3
Malawi Malawi 3
Mali Mali 3
Mauritania Mauritania 3
Mexico Mexico 3
Moldova Moldova 3
Mongolia Mongolia 3
Mozambique Mozambique 3
Nepal Nepal 3
Nicaragua Nicaragua 3
Niger Niger 3
Nigeria Nigeria 3
Oman Oman 3
Pakistan Pakistan 3
Papua New Guinea Papua New Guinea 3
Paraguay Paraguay 3
Qatar Qatar 3
Republic of Congo Republic of Congo 3
Russia Russia 3
Rwanda Rwanda 3
Saudi Arabia Saudi Arabia 3
Senegal Senegal 3
Sierra Leone Sierra Leone 3
Sri Lanka Sri Lanka 3
Suriname Suriname 3
Tajikistan Tajikistan 3
Tanzania Tanzania 3
Thailand Thailand 3
The Gambia The Gambia 3
The Philippines The Philippines 3
Timor-Leste Timor-Leste 3
Togo Togo 3
Trinidad and Tobago Trinidad and Tobago 3
Tunisia Tunisia 3
Türkiye Türkiye 3
Turkmenistan Turkmenistan 3
Uganda Uganda 3
Uzbekistan Uzbekistan 3
Vietnam Vietnam 3
Zambia Zambia 3
data |> mutate(Cluster=aglomerativo2$cluster) -> data.ksubdata_indicadores <- data.k |>
select(Cluster, 2:16)caract_medianas <- subdata_indicadores |>
group_by(across(contains("Cluster"))) |>
summarise(across(1:14, median, .names = "Med_{.col}"))
caract_medianas# A tibble: 3 × 15
Cluster `Med_Overall Score` `Med_Property Rights` `Med_Government Integrity`
<int> <dbl> <dbl> <dbl>
1 1 56.7 44.8 36.5
2 2 44.1 27.9 23.2
3 3 72.8 90 74.8
# ℹ 11 more variables: `Med_Judicial Effectiveness` <dbl>,
# `Med_Tax Burden` <dbl>, `Med_Government Spending` <dbl>,
# `Med_Fiscal Health` <dbl>, `Med_Business Freedom` <dbl>,
# `Med_Labor Freedom` <dbl>, `Med_Monetary Freedom` <dbl>,
# `Med_Trade Freedom` <dbl>, `Med_Investment Freedom` <dbl>,
# `Med_Financial Freedom` <dbl>, `Med_Free and fair elections` <dbl>
View(caract_medianas)ggplot(data.k, aes(x = factor(Cluster), fill = `World regions`)) +
geom_bar(position = "fill") + # proporciones
scale_y_continuous(labels = scales::percent) +
labs(
title = "Distribución porcentual de Regiones por clúster",
x = "Clúster",
y = "Porcentaje"
) +
theme_minimal()k_optimo = 3# eps
kNNdistplot(pca_df, k = k_optimo)
abline(h = 1, col = "red", lty = 2) rownames(subdata) <- data$Countryeps_values <- c(0.7, 0.8, 0.9, 1.0, 1.1, 1.2)
minPts <- 4dbscan_results <- data.frame(
eps = numeric(),
minPts = integer(),
clusters = integer(),
noise_points = integer()
)for (eps in eps_values) {
model <- dbscan(pca_df, eps = eps, minPts = minPts)
n_clusters <- length(unique(model$cluster)) - ifelse(any(model$cluster == 0), 1, 0) # excluye cluster 0 (outliers)
n_noise <- sum(model$cluster == 0)
dbscan_results <- rbind(dbscan_results, data.frame(
eps = eps,
minPts = minPts,
clusters = n_clusters,
noise_points = n_noise
))
}ggplot(dbscan_results, aes(x = eps, y = clusters)) +
geom_line() +
geom_point(size = 3) +
labs(title = "Número de clusters vs eps (DBSCAN)", y = "Clusters detectados") +
theme_minimal()Eligo eps = 0.8
set.seed(2023)
dbscan_cluster1 <- fpc::dbscan(data = pca_df,
eps = 0.8,
MinPts = 4)
print(dbscan_cluster1)dbscan Pts=159 MinPts=4 eps=0.8
0 1 2 3
border 19 1 1 2
seed 0 131 4 1
total 19 132 5 3
fviz_cluster(dbscan_cluster1,
pca_df, stand = FALSE,
ellipse = FALSE,
geom = "point") +
labs(title = "DBSCAN") + theme_bw()dbscan_valid <- pca_df[dbscan_cluster1$cluster != 0, ]
dbscan_labels <- dbscan_cluster1$cluster[dbscan_cluster1$cluster != 0]
# Calcular índice de silueta
sil_dbscan <- silhouette(dbscan_labels, dist(dbscan_valid))
fviz_silhouette(sil_dbscan) +
labs(title = "Índice de Silueta para DBSCAN (sin outliers)") +
theme_minimal() cluster size ave.sil.width
1 1 132 0.21
2 2 5 0.59
3 3 3 0.78
data_clustered <- as.data.frame(subdata) %>%
mutate(cluster_dbscan = factor(dbscan_cluster1$cluster))skim(data_clustered)| Name | data_clustered |
| Number of rows | 159 |
| Number of columns | 15 |
| _______________________ | |
| Column type frequency: | |
| factor | 1 |
| numeric | 14 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| cluster_dbscan | 0 | 1 | FALSE | 4 | 1: 132, 0: 19, 2: 5, 3: 3 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Property Rights | 0 | 1 | 0.11 | 0.62 | -1.17 | -0.38 | 0 | 0.62 | 1.22 | ▂▆▇▃▆ |
| Government Integrity | 0 | 1 | 0.14 | 0.74 | -1.18 | -0.39 | 0 | 0.61 | 1.94 | ▃▇▅▂▂ |
| Judicial Effectiveness | 0 | 1 | 0.10 | 0.64 | -0.93 | -0.38 | 0 | 0.62 | 1.30 | ▆▇▇▃▆ |
| Tax Burden | 0 | 1 | -0.04 | 0.87 | -5.24 | -0.45 | 0 | 0.55 | 1.44 | ▁▁▁▇▇ |
| Government Spending | 0 | 1 | -0.21 | 0.78 | -2.44 | -0.60 | 0 | 0.40 | 0.80 | ▁▂▃▆▇ |
| Fiscal Health | 0 | 1 | -0.17 | 0.67 | -1.58 | -0.58 | 0 | 0.42 | 0.65 | ▃▂▃▅▇ |
| Business Freedom | 0 | 1 | -0.14 | 0.65 | -2.26 | -0.61 | 0 | 0.39 | 0.94 | ▁▃▅▇▇ |
| Labor Freedom | 0 | 1 | -0.09 | 1.04 | -5.34 | -0.51 | 0 | 0.49 | 2.50 | ▁▁▂▇▂ |
| Monetary Freedom | 0 | 1 | -0.50 | 2.02 | -9.52 | -0.50 | 0 | 0.50 | 2.39 | ▁▁▁▇▇ |
| Trade Freedom | 0 | 1 | -0.10 | 0.80 | -4.85 | -0.50 | 0 | 0.50 | 1.53 | ▁▁▂▇▆ |
| Investment Freedom | 0 | 1 | -0.23 | 0.82 | -2.40 | -0.60 | 0 | 0.40 | 1.40 | ▁▂▆▇▂ |
| Financial Freedom | 0 | 1 | -0.06 | 0.93 | -2.50 | -0.50 | 0 | 0.50 | 1.50 | ▁▃▃▇▃ |
| Free and fair elections | 0 | 1 | -0.15 | 0.45 | -0.78 | -0.70 | 0 | 0.30 | 0.35 | ▆▁▁▂▇ |
| Civil liberties | 0 | 1 | -0.02 | 0.54 | -1.09 | -0.51 | 0 | 0.49 | 0.86 | ▃▇▇▆▇ |
# Resumen
summary_by_cluster <- data_clustered %>%
group_by(cluster_dbscan) %>%
summarise(across(everything(), list(mean = mean, sd = sd), .names = "{.col}_{.fn}"))
summary_by_cluster# A tibble: 4 × 29
cluster_dbscan `Property Rights_mean` `Property Rights_sd`
<fct> <dbl> <dbl>
1 0 -0.320 0.697
2 1 0.198 0.583
3 2 -0.499 0.281
4 3 -0.102 0.337
# ℹ 26 more variables: `Government Integrity_mean` <dbl>,
# `Government Integrity_sd` <dbl>, `Judicial Effectiveness_mean` <dbl>,
# `Judicial Effectiveness_sd` <dbl>, `Tax Burden_mean` <dbl>,
# `Tax Burden_sd` <dbl>, `Government Spending_mean` <dbl>,
# `Government Spending_sd` <dbl>, `Fiscal Health_mean` <dbl>,
# `Fiscal Health_sd` <dbl>, `Business Freedom_mean` <dbl>,
# `Business Freedom_sd` <dbl>, `Labor Freedom_mean` <dbl>, …
data_long <- data_clustered %>%
pivot_longer(
cols = -c(cluster_dbscan),
names_to = "Variable",
values_to = "Valor"
)ggplot(data_long, aes(x = cluster_dbscan, y = Valor, fill = cluster_dbscan)) +
geom_boxplot(outlier.color = "red", outlier.shape = 1, alpha = 0.7) +
facet_wrap(~ Variable, scales = "free_y") +
labs(
title = "Distribución de índices por Cluster (DBSCAN)",
x = "Cluster DBSCAN",
y = "Valor del índice"
) +
theme_minimal() +
theme(
legend.position = "none",
strip.text = element_text(size = 10)
)data_clustered$PC1 <- pc$x[,1]
data_clustered$PC2 <- pc$x[,2]
data_clustered_country <- rownames_to_column(data_clustered, var = "Country")ggplot(data_clustered_country, aes(x = PC1, y = PC2, color = cluster_dbscan, label = Country)) +
geom_point(size = 5, alpha = 1) +
geom_text_repel(size = 3, max.overlaps = 100) +
labs(
title = "Clusters DBSCAN en espacio PCA (con nombres de países)",
x = "PC1",
y = "PC2"
) +
theme_minimal()