# Preliminares (cargue de librerías y datos)
library(readxl)
library(FactoMineR)
# Cargue de datos
datos <- read_excel("cpia2015.xlsx")
View(datos)
# variables categoricas y continuas
categoricas = colnames(datos)[1:4]
continuas = colnames(datos)[5:22]
var_continuas = datos[, c(continuas)]
var_categoricas = datos[, c(categoricas)]
# Grupo 1: manejo economico
g1 = colnames(var_continuas)[1:3]
var_g1 = datos[, c(g1)]
# Grupo 2: Politica estructural
g2 = colnames(var_continuas)[4:6]
var_g2 = datos[, c(g2)]
# Grupo 3: Politicas de inclusion social
g3 = colnames(var_continuas)[7:11]
var_g3 = datos[, c(g3)]
# Grupo 4: Gobierno
g4 = colnames(var_continuas)[12:16]
var_g4 = datos[, c(g4)]
# Grupo 5: infraestructura
g5 = colnames(var_continuas)[17:18]
var_g5 = datos[, c(g5)]
# ACP grupo 1
ACP_1 = PCA(var_g1,
ncp = 1,
graph = FALSE)
# ACP grupo 2
ACP_2 = PCA(var_g2,
ncp = 1,
graph = FALSE)
# ACP grupo 3
ACP_3 = PCA(var_g3,
ncp = 1,
graph = FALSE)
# ACP grupo 4
ACP_4 = PCA(var_g4,
ncp = 1,
graph = FALSE)
# ACP grupo 5
ACP_5 = PCA(var_g5,
ncp = 1,
graph = FALSE)
paises_g1 = data.frame(paises = datos[, c("Country")],
Dim.1 = ACP_1$ind$coord)
paises_g2 = data.frame(paises = datos[, c("Country")],
Dim.1 = ACP_2$ind$coord)
paises_g3 = data.frame(paises = datos[, c("Country")],
Dim.1 = ACP_3$ind$coord)
paises_g4 = data.frame(paises = datos[, c("Country")],
Dim.1 = ACP_4$ind$coord)
paises_g5 = data.frame(paises = datos[, c("Country")],
Dim.1 = ACP_5$ind$coord)
resumen_acp = data.frame(ACP1 = c(min(ACP_1$ind$coord), max(ACP_1$ind$coord),
mean(ACP_1$ind$coord), sd(ACP_1$ind$coord),
ACP_1$eig[1, 2],
paises_g1$Country[paises_g1$Dim.1 == min(paises_g1$Dim.1)],
paises_g1$Country[paises_g1$Dim.1 == max(paises_g1$Dim.1)]),
ACP2 = c(min(ACP_2$ind$coord), max(ACP_2$ind$coord),
mean(ACP_2$ind$coord), sd(ACP_2$ind$coord),
ACP_2$eig[1, 2],
paises_g2$Country[paises_g2$Dim.1 == min(paises_g2$Dim.1)],
paises_g2$Country[paises_g2$Dim.1 == max(paises_g2$Dim.1)]),
ACP_3 = c(min(ACP_3$ind$coord), max(ACP_3$ind$coord),
mean(ACP_3$ind$coord), sd(ACP_3$ind$coord),
ACP_3$eig[1, 2],
paises_g3$Country[paises_g3$Dim.1 == min(paises_g3$Dim.1)],
paises_g3$Country[paises_g3$Dim.1 == max(paises_g3$Dim.1)]),
ACP_4 = c(min(ACP_4$ind$coord), max(ACP_4$ind$coord),
mean(ACP_4$ind$coord), sd(ACP_4$ind$coord),
ACP_4$eig[1, 2],
paises_g4$Country[paises_g4$Dim.1 == min(paises_g4$Dim.1)],
paises_g4$Country[paises_g4$Dim.1 == max(paises_g4$Dim.1)]),
ACP_5= c(min(ACP_5$ind$coord), max(ACP_5$ind$coord),
mean(ACP_5$ind$coord), sd(ACP_5$ind$coord),
ACP_5$eig[1, 2],
paises_g5$Country[paises_g5$Dim.1 == min(paises_g5$Dim.1)],
paises_g5$Country[paises_g5$Dim.1 == max(paises_g5$Dim.1)]))
resumen_acp
## ACP1 ACP2 ACP_3
## 1 -2.53192171537554 -2.93909251555701 -3.95803892626847
## 2 4.52372056636158 4.77438714389304 6.13895684195548
## 3 -2.78782616509365e-16 -1.7415368306818e-15 -1.46487126157239e-15
## 4 1.64025941706022 1.64505846480161 2.06677249368151
## 5 87.3216538108987 87.8333702164656 83.1827873703283
## 6 Rwanda Rwanda Rwanda
## 7 Somalia Somalia Somalia
## ACP_4 ACP_5
## 1 -3.790487345158 -3.80332810057689
## 2 6.19036328341326 2.67373447804198
## 3 -4.4957299031195e-16 5.23332976004306e-16
## 4 2.13654890989367 1.32203574780796
## 5 88.8942768640056 85.0892173471576
## 6 Rwanda Somalia
## 7 Somalia Rwanda
datos_pca_conjunto = data.frame(pca_1 = paises_g1$Dim.1,
pca_2 = paises_g2$Dim.1,
pca_3 = paises_g3$Dim.1,
pca_4 = paises_g4$Dim.1,
pca_5 = paises_g5$Dim.1)
acp_conjunto = PCA(datos_pca_conjunto,
graph = FALSE,
ncp = 1)
resumen_acp_conjunto = data.frame(resumen = c(min(acp_conjunto$ind$coord), max(acp_conjunto$ind$coord),
mean(acp_conjunto$ind$coord), sd(acp_conjunto$ind$coord),
acp_conjunto$eig[1, 2]))
resumen_acp_conjunto
## resumen
## 1 -4.099333e+00
## 2 6.529526e+00
## 3 -6.149138e-17
## 4 2.161709e+00
## 5 9.100023e+01
plot(density(acp_conjunto$ind$coord),
main = "Densidad PCA conjunto",
xlab = "PCA conjunto")
tabla = data.frame(paises = datos[, c("Country")],
Dim.1 = acp_conjunto$ind$coord)
tabla[order(tabla$Dim.1,decreasing=FALSE),]
## Country Dim.1
## 27 Rwanda -4.09933275
## 29 Senegal -2.92796464
## 17 Kenya -2.60254873
## 34 Tanzania -2.43078598
## 36 Uganda -2.29613128
## 4 Cameroon -2.19845935
## 12 Ethiopia -2.11637138
## 2 Burkina Faso -1.93264278
## 14 Ghana -1.72310312
## 1 Benin -1.40524803
## 23 Mauritania -1.11744296
## 26 Nigeria -1.10346438
## 18 Lesotho -1.05958821
## 25 Niger -0.81843356
## 9 Cote d'Ivoire -0.68621195
## 22 Mali -0.66524779
## 37 Zambia -0.58584620
## 15 Guinea -0.27636449
## 24 Mozambique -0.22067876
## 21 Malawi -0.09772408
## 19 Liberia 0.06179791
## 3 Burundi 0.19438673
## 28 Sao Tome & Principe 0.19447455
## 10 Djibouti 0.19702326
## 6 Chad 0.27219955
## 8 Congo, Dem. Rep. 0.39400963
## 35 Togo 0.48125907
## 30 Sierra Leone 0.58203323
## 13 Gambia 0.65327303
## 20 Madagascar 0.69339463
## 16 Guinea-Bissau 1.96261256
## 38 Zimbabwe 2.10652111
## 33 Sudan 2.40857054
## 5 Central African Rep. 2.75676157
## 7 Comoros 3.22177742
## 11 Eritrea 3.59734050
## 32 South Sudan 4.05662889
## 31 Somalia 6.52952627
#Punto 3
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(var_continuas,
kmeans,
method = "silhouette") +
geom_vline(xintercept = 6, linetype = 2)
set.seed(123)
km.res <- kmeans(var_continuas,
centers = 6,
nstart = 25)
metricas = list(inercia = km.res$withinss,
var_intra = km.res$withinss,
var_entre = km.res$betweenss)
fviz_cluster(km.res,
data = var_continuas,
palette = c("blue3",
"chocolate3",
"brown3",
"cadetblue3",
"darkgoldenrod3",
"green4"),
ellipse.type = "euclid", # Elipse de concentración
star.plot = TRUE, # Agregar segmentos de centroides a elementos
repel = TRUE, # Evitar el exceso de etiqueta (lenta)
ggtheme = theme_minimal()
)
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
acp_completo = PCA(var_continuas,
ncp = 5,
graph = FALSE)
round(acp_completo$var$cos2, 2)
## Dim.1 Dim.2 Dim.3
## Fiscal Policy 0.78 0.07 0.00
## Monetary Policy 0.75 0.02 0.05
## Debt Policy 0.78 0.01 0.07
## Financial Sector Development 0.84 0.02 0.00
## Trade Policy 0.75 0.08 0.05
## Business Regulatory Environment 0.83 0.00 0.00
## Gender Equality 0.65 0.11 0.00
## Equity of Public Resource Use 0.90 0.00 0.00
## Building Human Resources 0.69 0.04 0.15
## Social Protection and Labor 0.86 0.00 0.03
## Environmental Policy and Regulations 0.79 0.00 0.04
## Property Rights and Rule Based Governance 0.78 0.02 0.02
## Quality of Budgetary and Financial Managmt. 0.90 0.04 0.00
## Quality of Public Administration 0.88 0.01 0.01
## Efficiency of Revenue Mobilization 0.90 0.01 0.01
## Transparency, Accountability and Corruption in Pub. Sector 0.72 0.19 0.00
## Infrastructure Development 0.78 0.08 0.01
## Regional Integration 0.70 0.06 0.14
## Dim.4 Dim.5
## Fiscal Policy 0.07 0.01
## Monetary Policy 0.05 0.00
## Debt Policy 0.00 0.00
## Financial Sector Development 0.02 0.05
## Trade Policy 0.00 0.00
## Business Regulatory Environment 0.08 0.00
## Gender Equality 0.00 0.19
## Equity of Public Resource Use 0.00 0.01
## Building Human Resources 0.01 0.01
## Social Protection and Labor 0.00 0.00
## Environmental Policy and Regulations 0.09 0.01
## Property Rights and Rule Based Governance 0.11 0.00
## Quality of Budgetary and Financial Managmt. 0.00 0.01
## Quality of Public Administration 0.01 0.01
## Efficiency of Revenue Mobilization 0.00 0.00
## Transparency, Accountability and Corruption in Pub. Sector 0.01 0.03
## Infrastructure Development 0.00 0.04
## Regional Integration 0.02 0.00