library(FactoMineR)
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.92 loaded
datos <- read.table("PIB_2015.csv",header=TRUE,sep=";",
                    dec=",",row.names=1)

Estadísticas Descriptivas

summary(datos)
##       Agri             Mina               Manu             Elec      
##  Min.   :  0.00   Min.   :   0.000   Min.   :  2.29   Min.   : 2.29  
##  1st Qu.: 25.20   1st Qu.:   3.373   1st Qu.: 14.65   1st Qu.:10.05  
##  Median : 65.55   Median :  11.746   Median : 37.71   Median :26.11  
##  Mean   : 78.67   Mean   : 152.013   Mean   : 69.72   Mean   :26.63  
##  3rd Qu.: 98.88   3rd Qu.:  71.702   3rd Qu.:116.50   3rd Qu.:41.40  
##  Max.   :260.04   Max.   :1509.486   Max.   :314.54   Max.   :56.57  
##       Cons             Come             Tran             Fina       
##  Min.   :  0.00   Min.   : 35.71   Min.   : 23.40   Min.   : 16.80  
##  1st Qu.: 31.47   1st Qu.: 67.09   1st Qu.: 37.46   1st Qu.: 39.35  
##  Median : 56.80   Median : 85.65   Median : 57.63   Median : 82.06  
##  Mean   : 64.31   Mean   :102.14   Mean   : 63.25   Mean   :106.40  
##  3rd Qu.: 77.13   3rd Qu.:115.57   3rd Qu.: 76.63   3rd Qu.:114.20  
##  Max.   :262.29   Max.   :389.84   Max.   :162.21   Max.   :602.41  
##       Serv             Impu       
##  Min.   : 96.89   Min.   : 10.20  
##  1st Qu.:137.57   1st Qu.: 22.40  
##  Median :154.40   Median : 37.15  
##  Mean   :158.10   Mean   : 61.44  
##  3rd Qu.:164.39   3rd Qu.: 72.48  
##  Max.   :292.88   Max.   :307.36

Gráfico de la correlación

R <- cor(datos)
R
##             Agri         Mina        Manu       Elec       Cons        Come
## Agri  1.00000000  0.682209114  0.11475561 0.22827213 0.41660764 -0.08338495
## Mina  0.68220911  1.000000000 -0.13448890 0.11124287 0.33305380 -0.02927574
## Manu  0.11475561 -0.134488900  1.00000000 0.74307274 0.66278966  0.34247501
## Elec  0.22827213  0.111242871  0.74307274 1.00000000 0.51190603  0.58500211
## Cons  0.41660764  0.333053803  0.66278966 0.51190603 1.00000000  0.21414289
## Come -0.08338495 -0.029275744  0.34247501 0.58500211 0.21414289  1.00000000
## Tran  0.09012778  0.133308360  0.53970873 0.72095784 0.52077524  0.89668207
## Fina -0.13485953 -0.097873006  0.64115252 0.61617900 0.37124299  0.58807397
## Serv -0.19875751 -0.005582774  0.04621661 0.05299433 0.02012083  0.62114315
## Impu  0.02840857 -0.093136871  0.93705648 0.67268924 0.72381473  0.42400327
##            Tran        Fina         Serv        Impu
## Agri 0.09012778 -0.13485953 -0.198757514  0.02840857
## Mina 0.13330836 -0.09787301 -0.005582774 -0.09313687
## Manu 0.53970873  0.64115252  0.046216614  0.93705648
## Elec 0.72095784  0.61617900  0.052994334  0.67268924
## Cons 0.52077524  0.37124299  0.020120828  0.72381473
## Come 0.89668207  0.58807397  0.621143146  0.42400327
## Tran 1.00000000  0.65658131  0.542281934  0.62616517
## Fina 0.65658131  1.00000000  0.523071555  0.66189246
## Serv 0.54228193  0.52307156  1.000000000  0.16493342
## Impu 0.62616517  0.66189246  0.164933417  1.00000000
corrplot(R) ## visualizacion de la matriz de correlacion

Gráfico de cajas

boxplot(datos,col=rainbow(9))

Histogramas

A continuación se presenta el histograma de cada sector económico

xx <- colnames(datos)
par(mfrow=c(3,3))
for(i in 1:9){
  hist(datos[,i],col=i+1,main=xx[i])
}

ACP: Análisis de Componentes Principales

res.pca <- PCA(datos)
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_pca_biplot(res.pca,repel = TRUE) ## representacion simultanea
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Grafico de valores propios

eig.val <- res.pca$eig
barplot(eig.val[, 2], 
        names.arg = 1:nrow(eig.val), 
        main = "Varianzas Explicadas por los Comp",
        xlab = "Componentes Principales",
        ylab = "Porcentaje de Varianzas",
        col ="steelblue")  
lines(x = 1:nrow(eig.val), eig.val[, 2], 
      type = "b", pch = 19, col = "red")

Contribucciones

res.pca$var$contrib ## variables
##            Dim.1      Dim.2        Dim.3      Dim.4      Dim.5
## Agri  0.30872544 35.0253775  5.971711688  0.6321225  2.1730362
## Mina  0.07379574 24.6211523 23.706999667  1.4714003  4.0545772
## Manu 14.40865553  0.8871016 15.707448781  0.4365544  1.1355417
## Elec 14.54660210  1.4539033  0.637741437 31.2253500  6.2921691
## Cons  9.77842462 13.1683510  1.142814717 16.7993628 19.3456190
## Come 11.33734585  6.5042216 11.888578049 13.0291672  8.7486502
## Tran 16.50483299  0.5829425  7.235298095  3.3950242  9.8481023
## Fina 13.71589941  4.0459439  0.004298834  3.8388749 46.3926259
## Serv  3.58721360 13.4736323 23.437976759 23.7269889  0.4310053
## Impu 15.73850472  0.2373741 10.267131973  5.4451547  1.5786730
res.pca$ind$contrib ## ind
##                        Dim.1       Dim.2        Dim.3        Dim.4       Dim.5
## Amazonas        1.616551e+00  3.28157216 1.567213e+00  0.543095824  0.26832145
## Antioquia       3.606144e+00  0.02010573 1.471921e+00  1.210918292  2.02620388
## Arauca          1.130933e+00  8.02255526 3.639172e+00  0.210692564  1.67790759
## Atlantico       1.567952e+00  0.13665613 3.765569e+00  5.002647384  1.11145255
## Bogota          2.222271e+01 18.45934036 5.330386e+00 11.478961923 18.63483112
## Bolivar         1.327692e+00  0.14116581 5.675223e+00  0.481789730  3.30706497
## Boyaca          2.000496e+00  1.01876489 3.950357e-04  4.644193256  0.13587043
## Caldas          8.042290e-02  0.18600518 1.426263e+00  1.505315339  1.80756855
## Caqueta         1.597907e+00  0.26182727 1.333494e-01  3.725640518  0.47420235
## Casanare        1.069244e+00 25.04422767 1.689918e+01  0.226473917  0.28257455
## Cauca           5.960767e-01  0.05078900 3.221675e+00  0.757446755  0.59578587
## Cesar           2.978778e-01  0.43718942 1.550648e-01  1.859504447  1.10511218
## Choco           4.256025e+00  0.01364588 2.662718e-01  0.209061021  0.01970232
## Cordoba         1.641402e+00  0.04038387 4.212790e-01  1.301182076  1.12876690
## Cundinamarca    2.198072e+00  0.91234381 3.357811e+00  5.108768777  0.36467655
## Guainia         3.001028e+00  2.41251398 3.380379e-01  7.424485415  0.37588457
## Guaviare        2.324320e+00  1.71887395 4.012696e-04  3.789387814  3.38885911
## Huila           9.525231e-05  1.64401093 8.452366e-03  0.278578384  6.13883565
## Guajira         2.948635e+00  0.24464501 1.584149e+00  4.723309669  0.73305983
## Magdalena       9.420755e-01  0.04729625 1.182932e-01  1.102227055  0.01001281
## Meta            5.894059e-01 12.22837297 1.489696e+01  1.928012623  0.27307217
## Narino          2.349602e+00  0.01536967 5.138953e-01  0.101943482  0.45636050
## Norte.Santander 2.769244e-01  0.17434605 1.029253e-01  0.433920368  0.07126025
## Putumayo        3.347842e+00  0.32111827 1.831861e-01  0.757710740  0.23178355
## Quindio         1.006504e-01  0.05160621 2.996924e-02  0.143653514  0.38655745
## Risaralda       1.017766e-01  0.03996367 9.564544e-01  0.040397450  0.40550695
## San.Andres      4.955680e+00 14.29805605 2.104788e+01 22.431753800 22.43742973
## Santander       2.303147e+01  4.09680601 1.077750e+01 12.431874031 21.91226629
## Sucre           1.329238e+00  0.26716647 7.749915e-02  0.150568801  0.05432761
## Tolima          1.643097e-02  0.06388863 1.850072e-02  0.001485152  0.03589817
## Valle           3.389558e+00  0.70872650 1.815196e+00  0.446731697  9.15891185
## Vaupes          3.403889e+00  1.65996238 8.756991e-03  1.090961685  0.44292513
## Vichada         2.681870e+00  1.98070454 1.911656e-01  4.457306498  0.54700712
fviz_contrib(res.pca,choice="ind",axes=1) # eje 1

fviz_contrib(res.pca,choice="ind",axes=2) # eje 2

Cluster

cluster <- HCPC(res.pca, nb.clust=-1)

fviz_cluster(cluster)

cluster$desc.var ## interpretacion de los cluster
## 
## Link between the cluster variable and the quantitative variables
## ================================================================
##           Eta2      P-value
## Impu 0.9275266 1.389975e-13
## Come 0.9250760 2.131279e-13
## Manu 0.8882302 3.583108e-11
## Fina 0.8587489 7.063373e-10
## Elec 0.8273509 8.966781e-09
## Tran 0.8242706 1.120775e-08
## Serv 0.8064242 3.786625e-08
## Mina 0.7906915 1.008866e-07
## Agri 0.7678728 3.672225e-07
## Cons 0.7535589 7.725858e-07
## 
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
##         v.test Mean in category Overall mean sd in category Overall sd
## Come -1.999648        60.275125    102.14312      17.715117   67.00059
## Cons -2.217459        30.370500     64.31236      19.532978   48.98128
## Fina -2.270978        27.870125    106.39939       8.772735  110.65438
## Agri -2.301131        33.418250     78.66803      22.276345   62.92529
## Impu -2.331390        15.976625     61.44279       2.668106   62.40556
## Tran -2.521353        37.865375     63.25224       9.693601   32.22000
## Manu -2.720292         6.836875     69.72212       3.835880   73.97470
## Elec -4.079070         5.389875     26.63442       2.541170   16.66619
##           p.value
## Come 0.0455382864
## Cons 0.0265917573
## Fina 0.0231483073
## Agri 0.0213842527
## Impu 0.0197327928
## Tran 0.0116904617
## Manu 0.0065224290
## Elec 0.0000452163
## 
## $`2`
##         v.test Mean in category Overall mean sd in category Overall sd
## Serv -2.809212         136.8932     158.1017       16.13199   34.43154
##          p.value
## Serv 0.004966297
## 
## $`3`
##        v.test Mean in category Overall mean sd in category Overall sd
## Mina 5.018741        1140.3233    152.01291       434.5346  352.26835
## Agri 4.442801         234.9493     78.66803        34.9145   62.92529
##           p.value
## Mina 5.201135e-07
## Agri 8.879546e-06
## 
## $`4`
##        v.test Mean in category Overall mean sd in category Overall sd
## Elec 3.382828          47.7765     26.63442       8.337445   16.66619
## Manu 3.305979         161.4317     69.72212      24.108422   73.97470
## Impu 2.540452         120.8947     61.44279      30.334334   62.40556
##           p.value
## Elec 0.0007174344
## Manu 0.0009464527
## Impu 0.0110709269
## 
## $`5`
##        v.test Mean in category Overall mean sd in category Overall sd
## Come 4.293916          389.838    102.14312              0   67.00059
## Tran 3.071439          162.214     63.25224              0   32.22000
## Serv 2.019146          227.624    158.10170              0   34.43154
##           p.value
## Come 0.0000175549
## Tran 0.0021302964
## Serv 0.0434720598
## 
## $`6`
##        v.test Mean in category Overall mean sd in category Overall sd
## Fina 4.482567          602.415    106.39939              0  110.65438
## Serv 3.914240          292.875    158.10170              0   34.43154
## Come 2.440947          265.688    102.14312              0   67.00059
## Tran 2.394468          140.402     63.25224              0   32.22000
##           p.value
## Fina 7.375064e-06
## Serv 9.068951e-05
## Come 1.464880e-02
## Tran 1.664449e-02
## 
## $`7`
##        v.test Mean in category Overall mean sd in category Overall sd
## Cons 4.041904          262.290     64.31236              0   48.98128
## Impu 3.940678          307.363     61.44279              0   62.40556
## Manu 3.309535          314.544     69.72212              0   73.97470
##           p.value
## Cons 5.301887e-05
## Impu 8.125174e-05
## Manu 9.345100e-04

Individuos Suplementarios

res.pca2 <- PCA(datos,ind.sup = c(5,28)) ## suplementarios Bogota y Santander
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Variable suplementaria

res.pca3 <- PCA(datos,quanti.sup=9) ## servicios como suplementaria
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps