library(FactoMineR)
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.92 loaded
datos <- read.table("PIB_2015.csv",header=TRUE,sep=";",
dec=",",row.names=1)
summary(datos)
## Agri Mina Manu Elec
## Min. : 0.00 Min. : 0.000 Min. : 2.29 Min. : 2.29
## 1st Qu.: 25.20 1st Qu.: 3.373 1st Qu.: 14.65 1st Qu.:10.05
## Median : 65.55 Median : 11.746 Median : 37.71 Median :26.11
## Mean : 78.67 Mean : 152.013 Mean : 69.72 Mean :26.63
## 3rd Qu.: 98.88 3rd Qu.: 71.702 3rd Qu.:116.50 3rd Qu.:41.40
## Max. :260.04 Max. :1509.486 Max. :314.54 Max. :56.57
## Cons Come Tran Fina
## Min. : 0.00 Min. : 35.71 Min. : 23.40 Min. : 16.80
## 1st Qu.: 31.47 1st Qu.: 67.09 1st Qu.: 37.46 1st Qu.: 39.35
## Median : 56.80 Median : 85.65 Median : 57.63 Median : 82.06
## Mean : 64.31 Mean :102.14 Mean : 63.25 Mean :106.40
## 3rd Qu.: 77.13 3rd Qu.:115.57 3rd Qu.: 76.63 3rd Qu.:114.20
## Max. :262.29 Max. :389.84 Max. :162.21 Max. :602.41
## Serv Impu
## Min. : 96.89 Min. : 10.20
## 1st Qu.:137.57 1st Qu.: 22.40
## Median :154.40 Median : 37.15
## Mean :158.10 Mean : 61.44
## 3rd Qu.:164.39 3rd Qu.: 72.48
## Max. :292.88 Max. :307.36
R <- cor(datos)
R
## Agri Mina Manu Elec Cons Come
## Agri 1.00000000 0.682209114 0.11475561 0.22827213 0.41660764 -0.08338495
## Mina 0.68220911 1.000000000 -0.13448890 0.11124287 0.33305380 -0.02927574
## Manu 0.11475561 -0.134488900 1.00000000 0.74307274 0.66278966 0.34247501
## Elec 0.22827213 0.111242871 0.74307274 1.00000000 0.51190603 0.58500211
## Cons 0.41660764 0.333053803 0.66278966 0.51190603 1.00000000 0.21414289
## Come -0.08338495 -0.029275744 0.34247501 0.58500211 0.21414289 1.00000000
## Tran 0.09012778 0.133308360 0.53970873 0.72095784 0.52077524 0.89668207
## Fina -0.13485953 -0.097873006 0.64115252 0.61617900 0.37124299 0.58807397
## Serv -0.19875751 -0.005582774 0.04621661 0.05299433 0.02012083 0.62114315
## Impu 0.02840857 -0.093136871 0.93705648 0.67268924 0.72381473 0.42400327
## Tran Fina Serv Impu
## Agri 0.09012778 -0.13485953 -0.198757514 0.02840857
## Mina 0.13330836 -0.09787301 -0.005582774 -0.09313687
## Manu 0.53970873 0.64115252 0.046216614 0.93705648
## Elec 0.72095784 0.61617900 0.052994334 0.67268924
## Cons 0.52077524 0.37124299 0.020120828 0.72381473
## Come 0.89668207 0.58807397 0.621143146 0.42400327
## Tran 1.00000000 0.65658131 0.542281934 0.62616517
## Fina 0.65658131 1.00000000 0.523071555 0.66189246
## Serv 0.54228193 0.52307156 1.000000000 0.16493342
## Impu 0.62616517 0.66189246 0.164933417 1.00000000
corrplot(R) ## visualizacion de la matriz de correlacion
boxplot(datos,col=rainbow(9))
A continuación se presenta el histograma de cada sector económico
xx <- colnames(datos)
par(mfrow=c(3,3))
for(i in 1:9){
hist(datos[,i],col=i+1,main=xx[i])
}
res.pca <- PCA(datos)
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
fviz_pca_biplot(res.pca,repel = TRUE) ## representacion simultanea
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
eig.val <- res.pca$eig
barplot(eig.val[, 2],
names.arg = 1:nrow(eig.val),
main = "Varianzas Explicadas por los Comp",
xlab = "Componentes Principales",
ylab = "Porcentaje de Varianzas",
col ="steelblue")
lines(x = 1:nrow(eig.val), eig.val[, 2],
type = "b", pch = 19, col = "red")
res.pca$var$contrib ## variables
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Agri 0.30872544 35.0253775 5.971711688 0.6321225 2.1730362
## Mina 0.07379574 24.6211523 23.706999667 1.4714003 4.0545772
## Manu 14.40865553 0.8871016 15.707448781 0.4365544 1.1355417
## Elec 14.54660210 1.4539033 0.637741437 31.2253500 6.2921691
## Cons 9.77842462 13.1683510 1.142814717 16.7993628 19.3456190
## Come 11.33734585 6.5042216 11.888578049 13.0291672 8.7486502
## Tran 16.50483299 0.5829425 7.235298095 3.3950242 9.8481023
## Fina 13.71589941 4.0459439 0.004298834 3.8388749 46.3926259
## Serv 3.58721360 13.4736323 23.437976759 23.7269889 0.4310053
## Impu 15.73850472 0.2373741 10.267131973 5.4451547 1.5786730
res.pca$ind$contrib ## ind
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Amazonas 1.616551e+00 3.28157216 1.567213e+00 0.543095824 0.26832145
## Antioquia 3.606144e+00 0.02010573 1.471921e+00 1.210918292 2.02620388
## Arauca 1.130933e+00 8.02255526 3.639172e+00 0.210692564 1.67790759
## Atlantico 1.567952e+00 0.13665613 3.765569e+00 5.002647384 1.11145255
## Bogota 2.222271e+01 18.45934036 5.330386e+00 11.478961923 18.63483112
## Bolivar 1.327692e+00 0.14116581 5.675223e+00 0.481789730 3.30706497
## Boyaca 2.000496e+00 1.01876489 3.950357e-04 4.644193256 0.13587043
## Caldas 8.042290e-02 0.18600518 1.426263e+00 1.505315339 1.80756855
## Caqueta 1.597907e+00 0.26182727 1.333494e-01 3.725640518 0.47420235
## Casanare 1.069244e+00 25.04422767 1.689918e+01 0.226473917 0.28257455
## Cauca 5.960767e-01 0.05078900 3.221675e+00 0.757446755 0.59578587
## Cesar 2.978778e-01 0.43718942 1.550648e-01 1.859504447 1.10511218
## Choco 4.256025e+00 0.01364588 2.662718e-01 0.209061021 0.01970232
## Cordoba 1.641402e+00 0.04038387 4.212790e-01 1.301182076 1.12876690
## Cundinamarca 2.198072e+00 0.91234381 3.357811e+00 5.108768777 0.36467655
## Guainia 3.001028e+00 2.41251398 3.380379e-01 7.424485415 0.37588457
## Guaviare 2.324320e+00 1.71887395 4.012696e-04 3.789387814 3.38885911
## Huila 9.525231e-05 1.64401093 8.452366e-03 0.278578384 6.13883565
## Guajira 2.948635e+00 0.24464501 1.584149e+00 4.723309669 0.73305983
## Magdalena 9.420755e-01 0.04729625 1.182932e-01 1.102227055 0.01001281
## Meta 5.894059e-01 12.22837297 1.489696e+01 1.928012623 0.27307217
## Narino 2.349602e+00 0.01536967 5.138953e-01 0.101943482 0.45636050
## Norte.Santander 2.769244e-01 0.17434605 1.029253e-01 0.433920368 0.07126025
## Putumayo 3.347842e+00 0.32111827 1.831861e-01 0.757710740 0.23178355
## Quindio 1.006504e-01 0.05160621 2.996924e-02 0.143653514 0.38655745
## Risaralda 1.017766e-01 0.03996367 9.564544e-01 0.040397450 0.40550695
## San.Andres 4.955680e+00 14.29805605 2.104788e+01 22.431753800 22.43742973
## Santander 2.303147e+01 4.09680601 1.077750e+01 12.431874031 21.91226629
## Sucre 1.329238e+00 0.26716647 7.749915e-02 0.150568801 0.05432761
## Tolima 1.643097e-02 0.06388863 1.850072e-02 0.001485152 0.03589817
## Valle 3.389558e+00 0.70872650 1.815196e+00 0.446731697 9.15891185
## Vaupes 3.403889e+00 1.65996238 8.756991e-03 1.090961685 0.44292513
## Vichada 2.681870e+00 1.98070454 1.911656e-01 4.457306498 0.54700712
fviz_contrib(res.pca,choice="ind",axes=1) # eje 1
fviz_contrib(res.pca,choice="ind",axes=2) # eje 2
cluster <- HCPC(res.pca, nb.clust=-1)
fviz_cluster(cluster)
cluster$desc.var ## interpretacion de los cluster
##
## Link between the cluster variable and the quantitative variables
## ================================================================
## Eta2 P-value
## Impu 0.9275266 1.389975e-13
## Come 0.9250760 2.131279e-13
## Manu 0.8882302 3.583108e-11
## Fina 0.8587489 7.063373e-10
## Elec 0.8273509 8.966781e-09
## Tran 0.8242706 1.120775e-08
## Serv 0.8064242 3.786625e-08
## Mina 0.7906915 1.008866e-07
## Agri 0.7678728 3.672225e-07
## Cons 0.7535589 7.725858e-07
##
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
## v.test Mean in category Overall mean sd in category Overall sd
## Come -1.999648 60.275125 102.14312 17.715117 67.00059
## Cons -2.217459 30.370500 64.31236 19.532978 48.98128
## Fina -2.270978 27.870125 106.39939 8.772735 110.65438
## Agri -2.301131 33.418250 78.66803 22.276345 62.92529
## Impu -2.331390 15.976625 61.44279 2.668106 62.40556
## Tran -2.521353 37.865375 63.25224 9.693601 32.22000
## Manu -2.720292 6.836875 69.72212 3.835880 73.97470
## Elec -4.079070 5.389875 26.63442 2.541170 16.66619
## p.value
## Come 0.0455382864
## Cons 0.0265917573
## Fina 0.0231483073
## Agri 0.0213842527
## Impu 0.0197327928
## Tran 0.0116904617
## Manu 0.0065224290
## Elec 0.0000452163
##
## $`2`
## v.test Mean in category Overall mean sd in category Overall sd
## Serv -2.809212 136.8932 158.1017 16.13199 34.43154
## p.value
## Serv 0.004966297
##
## $`3`
## v.test Mean in category Overall mean sd in category Overall sd
## Mina 5.018741 1140.3233 152.01291 434.5346 352.26835
## Agri 4.442801 234.9493 78.66803 34.9145 62.92529
## p.value
## Mina 5.201135e-07
## Agri 8.879546e-06
##
## $`4`
## v.test Mean in category Overall mean sd in category Overall sd
## Elec 3.382828 47.7765 26.63442 8.337445 16.66619
## Manu 3.305979 161.4317 69.72212 24.108422 73.97470
## Impu 2.540452 120.8947 61.44279 30.334334 62.40556
## p.value
## Elec 0.0007174344
## Manu 0.0009464527
## Impu 0.0110709269
##
## $`5`
## v.test Mean in category Overall mean sd in category Overall sd
## Come 4.293916 389.838 102.14312 0 67.00059
## Tran 3.071439 162.214 63.25224 0 32.22000
## Serv 2.019146 227.624 158.10170 0 34.43154
## p.value
## Come 0.0000175549
## Tran 0.0021302964
## Serv 0.0434720598
##
## $`6`
## v.test Mean in category Overall mean sd in category Overall sd
## Fina 4.482567 602.415 106.39939 0 110.65438
## Serv 3.914240 292.875 158.10170 0 34.43154
## Come 2.440947 265.688 102.14312 0 67.00059
## Tran 2.394468 140.402 63.25224 0 32.22000
## p.value
## Fina 7.375064e-06
## Serv 9.068951e-05
## Come 1.464880e-02
## Tran 1.664449e-02
##
## $`7`
## v.test Mean in category Overall mean sd in category Overall sd
## Cons 4.041904 262.290 64.31236 0 48.98128
## Impu 3.940678 307.363 61.44279 0 62.40556
## Manu 3.309535 314.544 69.72212 0 73.97470
## p.value
## Cons 5.301887e-05
## Impu 8.125174e-05
## Manu 9.345100e-04
res.pca2 <- PCA(datos,ind.sup = c(5,28)) ## suplementarios Bogota y Santander
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
res.pca3 <- PCA(datos,quanti.sup=9) ## servicios como suplementaria
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps