#install.packages("easypackages")        # Libreria especial para hacer carga automática de librerias
library("easypackages")

lib_req<-c("MASS","visdat","car","HSAUR2","corrplot","plotrix")# Listado de librerias requeridas por el script
easypackages::packages(lib_req)         # Verificación, instalación y carga de librerias.
## Loading required package: MASS
## Loading required package: visdat
## Loading required package: car
## Loading required package: carData
## Loading required package: HSAUR2
## Loading required package: tools
## Loading required package: corrplot
## corrplot 0.92 loaded
## Loading required package: plotrix
## All packages loaded successfully
#------------------------------------------------------------------------------#
####                 1. Lectura de Datos                                    ####
#------------------------------------------------------------------------------#
library(readxl)
file.choose()  #para escoger el archivo desde la carpeta
## [1] "C:\\Users\\julia\\OneDrive\\Desktop\\Maestria\\Semestre 1\\Metodos cuantitativos\\5 Analsis de componentes principales\\PIBpc.xlsx"
PIBpc <- read_excel("C:\\Users\\julia\\OneDrive\\Desktop\\Maestria\\Semestre 1\\Metodos cuantitativos\\5 Analsis de componentes principales\\PIBpc.xlsx")
#datos<-read.table("clipboard",header=T,dec=".",sep="\t")
View(PIBpc)
str(PIBpc)                           # La estructura de las variables es adecuada, no requiere transofrmación.
## tibble [33 x 15] (S3: tbl_df/tbl/data.frame)
##  $ Departamento                                                                                                                                                                                                                                                                : chr [1:33] "Antioquia" "Atlantico" "Bogota D.C." "Bolivar" ...
##  $ Abreviatura                                                                                                                                                                                                                                                                 : chr [1:33] "Anti" "Atla" "Bogo" "Boli" ...
##  $ Agricultura, ganadería, caza, silvicultura y pesca                                                                                                                                                                                                                          : num [1:33] 1.06e-03 1.59e-04 1.77e-06 6.49e-04 2.06e-03 ...
##  $ Explotación de minas y canteras                                                                                                                                                                                                                                             : num [1:33] 3.45e-04 4.16e-05 4.49e-05 3.49e-04 1.45e-03 ...
##  $ Industrias manufactureras                                                                                                                                                                                                                                                   : num [1:33] 0.00317 0.00236 0.00261 0.00235 0.00237 ...
##  $ Suministro de electricidad, gas, vapor y aire acondicionado; distribución de agua; evacuación y tratamiento de aguas residuales, gestión de desechos y actividades de saneamiento ambiental                                                                                 : num [1:33] 0.000826 0.00097 0.000564 0.000404 0.00108 ...
##  $ Construcción                                                                                                                                                                                                                                                                : num [1:33] 0.00162 0.00112 0.00148 0.00154 0.00212 ...
##  $ Comercio al por mayor y al por menor; reparación de vehículos automotores y motocicletas; transporte y almacenamiento; alojamiento y servicios de comida                                                                                                                    : num [1:33] 0.00316 0.00291 0.00577 0.00256 0.00343 ...
##  $ Información y comunicaciones                                                                                                                                                                                                                                                : num [1:33] 0.000562 0.0004 0.001321 0.000284 0.000302 ...
##  $ Actividades financieras y de seguros                                                                                                                                                                                                                                        : num [1:33] 0.00097 0.000648 0.002616 0.000348 0.000368 ...
##  $ Actividades inmobiliarias                                                                                                                                                                                                                                                   : num [1:33] 0.0017 0.001061 0.004024 0.000968 0.001067 ...
##  $ Actividades profesionales, científicas y técnicas; actividades de servicios administrativos y de apoyo                                                                                                                                                                      : num [1:33] 0.001726 0.001084 0.002642 0.00101 0.000827 ...
##  $ Administración pública y defensa; planes de seguridad social de afiliación obligatoria; educación; actividades de atención de la salud humana y de servicios sociales                                                                                                       : num [1:33] 0.0022 0.00225 0.00448 0.00255 0.00247 ...
##  $ Actividades artísticas, de entretenimiento y recreación y otras actividades de servicios; actividades de los hogares individuales en calidad de empleadores; actividades no diferenciadas de los hogares individuales como productores de bienes y servicios para uso propio: num [1:33] 0.000465 0.000368 0.001219 0.000215 0.000209 ...
##  $ Impuestos                                                                                                                                                                                                                                                                   : num [1:33] 0.00173 0.00146 0.00306 0.00166 0.00132 ...
columnas <- colnames(PIBpc) 
columnas
##  [1] "Departamento"                                                                                                                                                                                                                                                                
##  [2] "Abreviatura"                                                                                                                                                                                                                                                                 
##  [3] "Agricultura, ganadería, caza, silvicultura y pesca"                                                                                                                                                                                                                          
##  [4] "Explotación de minas y canteras"                                                                                                                                                                                                                                             
##  [5] "Industrias manufactureras"                                                                                                                                                                                                                                                   
##  [6] "Suministro de electricidad, gas, vapor y aire acondicionado; distribución de agua; evacuación y tratamiento de aguas residuales, gestión de desechos y actividades de saneamiento ambiental"                                                                                 
##  [7] "Construcción"                                                                                                                                                                                                                                                                
##  [8] "Comercio al por mayor y al por menor; reparación de vehículos automotores y motocicletas; transporte y almacenamiento; alojamiento y servicios de comida"                                                                                                                    
##  [9] "Información y comunicaciones"                                                                                                                                                                                                                                                
## [10] "Actividades financieras y de seguros"                                                                                                                                                                                                                                        
## [11] "Actividades inmobiliarias"                                                                                                                                                                                                                                                   
## [12] "Actividades profesionales, científicas y técnicas; actividades de servicios administrativos y de apoyo"                                                                                                                                                                      
## [13] "Administración pública y defensa; planes de seguridad social de afiliación obligatoria; educación; actividades de atención de la salud humana y de servicios sociales"                                                                                                       
## [14] "Actividades artísticas, de entretenimiento y recreación y otras actividades de servicios; actividades de los hogares individuales en calidad de empleadores; actividades no diferenciadas de los hogares individuales como productores de bienes y servicios para uso propio"
## [15] "Impuestos"
colnames(PIBpc) <- c('Depto','Abrev','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15')

windows(height=10,width=15)
visdat::vis_miss(PIBpc)              # Visualización de Datos Faltantes, no hay datos faltantes.
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

#todas las vbles se mueven en la misma dirección de desempeño

## Visualización Univariada de datos
PIBpc <- data.frame(PIBpc[-2],row.names = 'Depto')
Resumen= rbind(apply(PIBpc,2,"mean"),
               apply(PIBpc,2,"sd"))
rownames(Resumen)=c("Promedio", "Desviación")
print(Resumen,2)
##                  C3     C4     C5      C6      C7     C8      C9     C10
## Promedio    0.00135 0.0016 0.0012 0.00036 0.00100 0.0026 0.00027 0.00040
## Desviación 0.00088 0.0035 0.0012 0.00029 0.00044 0.0022 0.00025 0.00045
##                 C11     C12     C13     C14     C15
## Promedio    0.00088 0.00063 0.00243 0.00024 0.00099
## Desviación 0.00076 0.00060 0.00047 0.00021 0.00091
windows(height=10,width=15)
par(mfrow=c(2,7))
lapply(colnames(PIBpc),function(y){
   boxplot(PIBpc[,y],ylab=y,cex=1.5,pch=20,col="blue")
}) 
## [[1]]
## [[1]]$stats
##              [,1]
## [1,] 0.0000017651
## [2,] 0.0007834526
## [3,] 0.0012259332
## [4,] 0.0019234639
## [5,] 0.0032969458
## 
## [[1]]$n
## [1] 33
## 
## [[1]]$conf
##              [,1]
## [1,] 0.0009123814
## [2,] 0.0015394850
## 
## [[1]]$out
## [1] 0.003666048
## 
## [[1]]$group
## [1] 1
## 
## [[1]]$names
## [1] ""
## 
## 
## [[2]]
## [[2]]$stats
##              [,1]
## [1,] 0.0000147256
## [2,] 0.0000416027
## [3,] 0.0001453865
## [4,] 0.0007584899
## [5,] 0.0014533347
## 
## [[2]]$n
## [1] 33
## 
## [[2]]$conf
##               [,1]
## [1,] -5.178809e-05
## [2,]  3.425611e-04
## 
## [[2]]$out
## [1] 0.005066300 0.003765644 0.013433047 0.005758367 0.014689725 0.003152308
## 
## [[2]]$group
## [1] 1 1 1 1 1 1
## 
## [[2]]$names
## [1] ""
## 
## 
## [[3]]
## [[3]]$stats
##              [,1]
## [1,] 0.0000098234
## [2,] 0.0002221225
## [3,] 0.0006364525
## [4,] 0.0019744641
## [5,] 0.0045157631
## 
## [[3]]$n
## [1] 33
## 
## [[3]]$conf
##              [,1]
## [1,] 0.0001544837
## [2,] 0.0011184213
## 
## [[3]]$out
## numeric(0)
## 
## [[3]]$group
## numeric(0)
## 
## [[3]]$names
## [1] ""
## 
## 
## [[4]]
## [[4]]$stats
##              [,1]
## [1,] 0.0000142744
## [2,] 0.0001256419
## [3,] 0.0002925538
## [4,] 0.0005256133
## [5,] 0.0010801120
## 
## [[4]]$n
## [1] 33
## 
## [[4]]$conf
##              [,1]
## [1,] 0.0001825446
## [2,] 0.0004025630
## 
## [[4]]$out
## numeric(0)
## 
## [[4]]$group
## numeric(0)
## 
## [[4]]$names
## [1] ""
## 
## 
## [[5]]
## [[5]]$stats
##              [,1]
## [1,] 0.0003409184
## [2,] 0.0006911470
## [3,] 0.0009486054
## [4,] 0.0011514308
## [5,] 0.0016201181
## 
## [[5]]$n
## [1] 33
## 
## [[5]]$conf
##              [,1]
## [1,] 0.0008220077
## [2,] 0.0010752031
## 
## [[5]]$out
## [1] 0.002120891 0.002254678
## 
## [[5]]$group
## [1] 1 1
## 
## [[5]]$names
## [1] ""
## 
## 
## [[6]]
## [[6]]$stats
##              [,1]
## [1,] 0.0005394066
## [2,] 0.0014976081
## [3,] 0.0021030158
## [4,] 0.0029135101
## [5,] 0.0034254262
## 
## [[6]]$n
## [1] 33
## 
## [[6]]$conf
##             [,1]
## [1,] 0.001713582
## [2,] 0.002492449
## 
## [[6]]$out
## [1] 0.005765305 0.005508334 0.013094263
## 
## [[6]]$group
## [1] 1 1 1
## 
## [[6]]$names
## [1] ""
## 
## 
## [[7]]
## [[7]]$stats
##              [,1]
## [1,] 0.0000054203
## [2,] 0.0001191128
## [3,] 0.0002068047
## [4,] 0.0003552118
## [5,] 0.0006654954
## 
## [[7]]$n
## [1] 33
## 
## [[7]]$conf
##              [,1]
## [1,] 0.0001418674
## [2,] 0.0002717420
## 
## [[7]]$out
## [1] 0.001321444
## 
## [[7]]$group
## [1] 1
## 
## [[7]]$names
## [1] ""
## 
## 
## [[8]]
## [[8]]$stats
##              [,1]
## [1,] 0.0000647886
## [2,] 0.0001842099
## [3,] 0.0002381115
## [4,] 0.0004321295
## [5,] 0.0007210548
## 
## [[8]]$n
## [1] 33
## 
## [[8]]$conf
##             [,1]
## [1,] 0.000169923
## [2,] 0.000306300
## 
## [[8]]$out
## [1] 0.0009697706 0.0026159909
## 
## [[8]]$group
## [1] 1 1
## 
## [[8]]$names
## [1] ""
## 
## 
## [[9]]
## [[9]]$stats
##              [,1]
## [1,] 0.0001378162
## [2,] 0.0004306560
## [3,] 0.0007258326
## [4,] 0.0010444960
## [5,] 0.0019387810
## 
## [[9]]$n
## [1] 33
## 
## [[9]]$conf
##              [,1]
## [1,] 0.0005570004
## [2,] 0.0008946648
## 
## [[9]]$out
## [1] 0.004023524 0.002423239
## 
## [[9]]$group
## [1] 1 1
## 
## [[9]]$names
## [1] ""
## 
## 
## [[10]]
## [[10]]$stats
##              [,1]
## [1,] 0.0000066204
## [2,] 0.0001383614
## [3,] 0.0005452353
## [4,] 0.0009782647
## [5,] 0.0017403809
## 
## [[10]]$n
## [1] 33
## 
## [[10]]$conf
##              [,1]
## [1,] 0.0003142260
## [2,] 0.0007762446
## 
## [[10]]$out
## [1] 0.002641694
## 
## [[10]]$group
## [1] 1
## 
## [[10]]$names
## [1] ""
## 
## 
## [[11]]
## [[11]]$stats
##             [,1]
## [1,] 0.001792587
## [2,] 0.002152163
## [3,] 0.002331543
## [4,] 0.002546605
## [5,] 0.003116327
## 
## [[11]]$n
## [1] 33
## 
## [[11]]$conf
##             [,1]
## [1,] 0.002223054
## [2,] 0.002440031
## 
## [[11]]$out
## [1] 0.004481686 0.003160621
## 
## [[11]]$group
## [1] 1 1
## 
## [[11]]$names
## [1] ""
## 
## 
## [[12]]
## [[12]]$stats
##              [,1]
## [1,] 0.0000428233
## [2,] 0.0001209356
## [3,] 0.0001880561
## [4,] 0.0003111150
## [5,] 0.0004650461
## 
## [[12]]$n
## [1] 33
## 
## [[12]]$conf
##              [,1]
## [1,] 0.0001357486
## [2,] 0.0002403636
## 
## [[12]]$out
## [1] 0.001218857
## 
## [[12]]$group
## [1] 1
## 
## [[12]]$names
## [1] ""
## 
## 
## [[13]]
## [[13]]$stats
##              [,1]
## [1,] 0.0001377523
## [2,] 0.0003961793
## [3,] 0.0006899259
## [4,] 0.0013203308
## [5,] 0.0018295074
## 
## [[13]]$n
## [1] 33
## 
## [[13]]$conf
##              [,1]
## [1,] 0.0004357448
## [2,] 0.0009441070
## 
## [[13]]$out
## [1] 0.003058367 0.004443086
## 
## [[13]]$group
## [1] 1 1
## 
## [[13]]$names
## [1] ""
which (PIBpc$C3==boxplot.stats(PIBpc$C3)$out) #busca cuál es el dato por fuera del boxplot
## [1] 26
which (PIBpc$C4%in%boxplot.stats(PIBpc$C4)$out)
## [1]  9 14 16 25 26 27
which (PIBpc$C5==boxplot.stats(PIBpc$C5)$out)
## integer(0)
which (PIBpc$C6==boxplot.stats(PIBpc$C6)$out)
## integer(0)
which (PIBpc$C7%in%boxplot.stats(PIBpc$C7)$out)
## [1]  5 21
which (PIBpc$C8%in%boxplot.stats(PIBpc$C8)$out)
## [1]  3 26 28
which (PIBpc$C9==boxplot.stats(PIBpc$C9)$out)
## [1] 3
which (PIBpc$C10%in%boxplot.stats(PIBpc$C10)$out)
## [1] 1 3
which (PIBpc$C11%in%boxplot.stats(PIBpc$C11)$out)
## [1]  3 24
which (PIBpc$C12==boxplot.stats(PIBpc$C12)$out)
## [1] 3
which (PIBpc$C13%in%boxplot.stats(PIBpc$C13)$out)
## [1]  3 28
which (PIBpc$C14==boxplot.stats(PIBpc$C14)$out)
## [1] 3
which (PIBpc$C15%in%boxplot.stats(PIBpc$C15)$out)
## [1]  3 21
## Aná¡lisis Bivariado de la correlación.

windows(height=10,width=15)
pairs(PIBpc,pch=20,cex=1.5,lower.panel = NULL) #con pairs hace parejas de datos

#Se observa que desde la columna 8 hasta la 15 hay un punto atípico comparando todas las vbles 

M.cor = cor(PIBpc,method="pearson")
print(M.cor,2)  #las columnas 3 y 4 solo tienen correlación entre ellas, pero no con las otras
##         C3      C4     C5      C6    C7     C8    C9    C10    C11   C12    C13
## C3   1.000  0.6160  0.050  0.0887 0.202 -0.066 -0.12 -0.238 -0.139 -0.20 -0.261
## C4   0.616  1.0000 -0.160  0.0083 0.074  0.135 -0.10 -0.047 -0.063 -0.15 -0.072
## C5   0.050 -0.1600  1.000  0.7457 0.743  0.175  0.71  0.475  0.640  0.78  0.042
## C6   0.089  0.0083  0.746  1.0000 0.623  0.323  0.56  0.397  0.469  0.64 -0.041
## C7   0.202  0.0735  0.743  0.6229 1.000  0.172  0.58  0.408  0.541  0.55  0.144
## C8  -0.066  0.1351  0.175  0.3234 0.172  1.000  0.40  0.462  0.432  0.40  0.486
## C9  -0.123 -0.1016  0.708  0.5593 0.578  0.397  1.00  0.923  0.930  0.92  0.527
## C10 -0.238 -0.0472  0.475  0.3969 0.408  0.462  0.92  1.000  0.926  0.85  0.708
## C11 -0.139 -0.0632  0.640  0.4691 0.541  0.432  0.93  0.926  1.000  0.89  0.573
## C12 -0.196 -0.1476  0.778  0.6393 0.552  0.402  0.92  0.846  0.894  1.00  0.383
## C13 -0.261 -0.0716  0.042 -0.0407 0.144  0.486  0.53  0.708  0.573  0.38  1.000
## C14 -0.291 -0.1874  0.520  0.4138 0.418  0.258  0.92  0.936  0.902  0.86  0.554
## C15  0.013 -0.0217  0.849  0.5779 0.741  0.480  0.81  0.655  0.776  0.76  0.349
##       C14    C15
## C3  -0.29  0.013
## C4  -0.19 -0.022
## C5   0.52  0.849
## C6   0.41  0.578
## C7   0.42  0.741
## C8   0.26  0.480
## C9   0.92  0.811
## C10  0.94  0.655
## C11  0.90  0.776
## C12  0.86  0.756
## C13  0.55  0.349
## C14  1.00  0.599
## C15  0.60  1.000
p.cor=corrplot::cor.mtest(PIBpc)$p  #test de correlación, significancia estadística de valor p
print(p.cor,4) #C3 y C4 en su mayoría son superior a 5% indica que NO hay significancia
##            C3        C4        C5        C6        C7       C8        C9
## C3  0.0000000 0.0001353 7.812e-01 6.236e-01 2.604e-01 0.714674 4.941e-01
## C4  0.0001353 0.0000000 3.737e-01 9.632e-01 6.844e-01 0.453375 5.736e-01
## C5  0.7811500 0.3736882 0.000e+00 6.355e-07 7.191e-07 0.330807 4.104e-06
## C6  0.6236246 0.9632316 6.355e-07 0.000e+00 1.081e-04 0.066343 7.145e-04
## C7  0.2604244 0.6843734 7.191e-07 1.081e-04 0.000e+00 0.337505 4.318e-04
## C8  0.7146739 0.4533752 3.308e-01 6.634e-02 3.375e-01 0.000000 2.204e-02
## C9  0.4941231 0.5736426 4.104e-06 7.145e-04 4.318e-04 0.022042 0.000e+00
## C10 0.1823453 0.7943217 5.209e-03 2.219e-02 1.844e-02 0.006781 2.057e-14
## C11 0.4405647 0.7266624 6.108e-05 5.887e-03 1.152e-03 0.011969 5.251e-15
## C12 0.2736935 0.4125066 9.849e-08 6.206e-05 8.629e-04 0.020501 2.606e-14
## C13 0.1426348 0.6921206 8.161e-01 8.221e-01 4.225e-01 0.004134 1.636e-03
## C14 0.1002207 0.2963759 1.902e-03 1.667e-02 1.560e-02 0.147192 2.756e-14
## C15 0.9413917 0.9044553 4.084e-10 4.281e-04 8.007e-07 0.004699 1.038e-08
##           C10       C11       C12       C13       C14       C15
## C3  1.823e-01 4.406e-01 2.737e-01 1.426e-01 1.002e-01 9.414e-01
## C4  7.943e-01 7.267e-01 4.125e-01 6.921e-01 2.964e-01 9.045e-01
## C5  5.209e-03 6.108e-05 9.849e-08 8.161e-01 1.902e-03 4.084e-10
## C6  2.219e-02 5.887e-03 6.206e-05 8.221e-01 1.667e-02 4.281e-04
## C7  1.844e-02 1.152e-03 8.629e-04 4.225e-01 1.560e-02 8.007e-07
## C8  6.781e-03 1.197e-02 2.050e-02 4.134e-03 1.472e-01 4.699e-03
## C9  2.057e-14 5.251e-15 2.606e-14 1.636e-03 2.756e-14 1.038e-08
## C10 0.000e+00 1.245e-14 5.768e-10 4.097e-06 1.275e-15 3.562e-05
## C11 1.245e-14 0.000e+00 2.343e-12 4.865e-04 7.891e-13 1.125e-07
## C12 5.768e-10 2.343e-12 0.000e+00 2.801e-02 1.249e-10 3.644e-07
## C13 4.097e-06 4.865e-04 2.801e-02 0.000e+00 8.186e-04 4.628e-02
## C14 1.275e-15 7.891e-13 1.249e-10 8.186e-04 0.000e+00 2.329e-04
## C15 3.562e-05 1.125e-07 3.644e-07 4.628e-02 2.329e-04 0.000e+00
#El hecho que el aspecto 3 y 4 aumente el PIB, no se realiciona con el resto, no se asocia con ninguna

windows(height=10,width=15)
corrplot::corrplot(M.cor, method = "ellipse",addCoef.col = "black",type="upper",
                   col=c("blue","red"),diag=FALSE,
                   p.mat = p.cor, sig.level = 0.01, insig = "blank"
                   )  #asociación positiva roja, negativa azul

#ordene por grupitos relacionados, cluster (order="hclust")
#grafique las que son mayores a 1% y las insignificantes no las grafique, en blanco
#Con la gráfica al ver todo en rojo se certifica la conclusión anterior que todas se relacionan de manera positiva


# Sin considerar el registro atípico
M.cor = cor(PIBpc[-3,],method="pearson")  #se quita registro atípico
print(M.cor,2)
##          C3      C4    C5     C6     C7     C8     C9   C10     C11    C12
## C3   1.0000  0.6200  0.11  0.129  0.271 0.0045  0.131 0.012  0.1001 -0.042
## C4   0.6200  1.0000 -0.15  0.018  0.091 0.1613 -0.065 0.050 -0.0071 -0.126
## C5   0.1142 -0.1473  1.00  0.742  0.732 0.1280  0.856 0.646  0.7408  0.833
## C6   0.1290  0.0185  0.74  1.000  0.615 0.3036  0.712 0.628  0.5662  0.709
## C7   0.2707  0.0910  0.73  0.615  1.000 0.1286  0.667 0.520  0.6027  0.553
## C8   0.0045  0.1613  0.13  0.304  0.129 1.0000  0.320 0.528  0.3732  0.320
## C9   0.1309 -0.0646  0.86  0.712  0.667 0.3204  1.000 0.841  0.8406  0.894
## C10  0.0115  0.0499  0.65  0.628  0.520 0.5278  0.841 1.000  0.8643  0.856
## C11  0.1001 -0.0071  0.74  0.566  0.603 0.3732  0.841 0.864  1.0000  0.839
## C12 -0.0424 -0.1258  0.83  0.709  0.553 0.3200  0.894 0.856  0.8389  1.000
## C13 -0.0783 -0.0162 -0.20 -0.225 -0.015 0.4733 -0.154 0.042 -0.0206 -0.172
## C14 -0.1195 -0.2202  0.64  0.561  0.467 0.0822  0.808 0.775  0.7632  0.817
## C15  0.1430  0.0116  0.86  0.582  0.739 0.4249  0.837 0.695  0.7723  0.699
##        C13    C14   C15
## C3  -0.078 -0.120 0.143
## C4  -0.016 -0.220 0.012
## C5  -0.200  0.638 0.856
## C6  -0.225  0.561 0.582
## C7  -0.015  0.467 0.739
## C8   0.473  0.082 0.425
## C9  -0.154  0.808 0.837
## C10  0.042  0.775 0.695
## C11 -0.021  0.763 0.772
## C12 -0.172  0.817 0.699
## C13  1.000 -0.278 0.050
## C14 -0.278  1.000 0.508
## C15  0.050  0.508 1.000
p.cor=corrplot::cor.mtest(PIBpc)$p
print(p.cor,4)
##            C3        C4        C5        C6        C7       C8        C9
## C3  0.0000000 0.0001353 7.812e-01 6.236e-01 2.604e-01 0.714674 4.941e-01
## C4  0.0001353 0.0000000 3.737e-01 9.632e-01 6.844e-01 0.453375 5.736e-01
## C5  0.7811500 0.3736882 0.000e+00 6.355e-07 7.191e-07 0.330807 4.104e-06
## C6  0.6236246 0.9632316 6.355e-07 0.000e+00 1.081e-04 0.066343 7.145e-04
## C7  0.2604244 0.6843734 7.191e-07 1.081e-04 0.000e+00 0.337505 4.318e-04
## C8  0.7146739 0.4533752 3.308e-01 6.634e-02 3.375e-01 0.000000 2.204e-02
## C9  0.4941231 0.5736426 4.104e-06 7.145e-04 4.318e-04 0.022042 0.000e+00
## C10 0.1823453 0.7943217 5.209e-03 2.219e-02 1.844e-02 0.006781 2.057e-14
## C11 0.4405647 0.7266624 6.108e-05 5.887e-03 1.152e-03 0.011969 5.251e-15
## C12 0.2736935 0.4125066 9.849e-08 6.206e-05 8.629e-04 0.020501 2.606e-14
## C13 0.1426348 0.6921206 8.161e-01 8.221e-01 4.225e-01 0.004134 1.636e-03
## C14 0.1002207 0.2963759 1.902e-03 1.667e-02 1.560e-02 0.147192 2.756e-14
## C15 0.9413917 0.9044553 4.084e-10 4.281e-04 8.007e-07 0.004699 1.038e-08
##           C10       C11       C12       C13       C14       C15
## C3  1.823e-01 4.406e-01 2.737e-01 1.426e-01 1.002e-01 9.414e-01
## C4  7.943e-01 7.267e-01 4.125e-01 6.921e-01 2.964e-01 9.045e-01
## C5  5.209e-03 6.108e-05 9.849e-08 8.161e-01 1.902e-03 4.084e-10
## C6  2.219e-02 5.887e-03 6.206e-05 8.221e-01 1.667e-02 4.281e-04
## C7  1.844e-02 1.152e-03 8.629e-04 4.225e-01 1.560e-02 8.007e-07
## C8  6.781e-03 1.197e-02 2.050e-02 4.134e-03 1.472e-01 4.699e-03
## C9  2.057e-14 5.251e-15 2.606e-14 1.636e-03 2.756e-14 1.038e-08
## C10 0.000e+00 1.245e-14 5.768e-10 4.097e-06 1.275e-15 3.562e-05
## C11 1.245e-14 0.000e+00 2.343e-12 4.865e-04 7.891e-13 1.125e-07
## C12 5.768e-10 2.343e-12 0.000e+00 2.801e-02 1.249e-10 3.644e-07
## C13 4.097e-06 4.865e-04 2.801e-02 0.000e+00 8.186e-04 4.628e-02
## C14 1.275e-15 7.891e-13 1.249e-10 8.186e-04 0.000e+00 2.329e-04
## C15 3.562e-05 1.125e-07 3.644e-07 4.628e-02 2.329e-04 0.000e+00
windows(height=10,width=15)
corrplot::corrplot(M.cor, method = "ellipse",addCoef.col = "black",type="upper",
                   col=c("blue","red"),diag=FALSE,
                   p.mat = p.cor, sig.level = 0.01, insig = "blank"
                   )

#No afectó la significancia de ninguna de las correlaciones, pero si afecta los coeficientes 
# el punto Afecta la estructura de la correlación 

windows()
plot(PIBpc$C9,PIBpc$C10,pch=20)#para ver como afecta el dato atípico

#plot(PIBpc$C9,PIBpc$C10,pch=20,xlim = c(0,0.0007), ylim= c(0,0.001))
plot(PIBpc$C8,PIBpc$C15,pch=20)

plot(PIBpc$C12,PIBpc$C13,pch=20)

plot(PIBpc$C14,PIBpc$C11,pch=20)

#modifica mucho no la significancia de las correlaciones sino las magnitudes

#------------------------------------------------------------------------------#
####     2. Análisis de Componentes Principales                             ####
#------------------------------------------------------------------------------#
X=PIBpc[-3,]  # Datos sin considerar el registro atípico

PCA=prcomp(X,center=TRUE,scale=TRUE)   # Las variables son autoescaladas
#La anterior es la que genera los componentes
PCA
## Standard deviations (1, .., p=13):
##  [1] 2.6303240 1.3453970 1.2664501 0.9121430 0.7462364 0.6024186 0.5819967
##  [8] 0.4528912 0.3725902 0.3267459 0.2670861 0.2118497 0.0999633
## 
## Rotation (n x k) = (13 x 13):
##             PC1         PC2         PC3          PC4          PC5         PC6
## C3   0.03884458 -0.56227546  0.38662216  0.042451625  0.187009770 -0.22574917
## C4  -0.01492272 -0.61728694  0.22100590 -0.357624300  0.029324100  0.22869718
## C5   0.34076419  0.06938535  0.14008413  0.313715540 -0.104151815 -0.23008127
## C6   0.29797814 -0.02210956  0.12036551 -0.002692612 -0.691537879  0.35968507
## C7   0.28189051 -0.15310036  0.14837604  0.506923376  0.040899485  0.48233620
## C8   0.14344466 -0.31544078 -0.54815221 -0.236003778 -0.391676835 -0.24269736
## C9   0.36518014  0.02351475  0.03437989 -0.026983953  0.073584459 -0.16098079
## C10  0.33770141 -0.04098155 -0.18180508 -0.324675779  0.134458476  0.14722489
## C11  0.34328403 -0.02262005 -0.07420687 -0.120471118  0.353072725 -0.09590779
## C12  0.35282812  0.12629871 -0.02019002 -0.175880682 -0.008617542 -0.03474902
## C13 -0.04270752 -0.25898530 -0.62907602  0.340481234  0.274050965  0.32272686
## C14  0.30618151  0.27013184  0.06388215 -0.296128573  0.310468454  0.28929247
## C15  0.32865962 -0.12576693 -0.07151956  0.322489588  0.016992208 -0.42075576
##             PC7         PC8         PC9        PC10         PC11        PC12
## C3   0.62675471 -0.13050140  0.00919123 -0.06803383  0.054958448 -0.12351784
## C4  -0.48628525  0.30999650 -0.11884656  0.06287157 -0.178220294  0.01163943
## C5  -0.01556021  0.42431379  0.01690890 -0.05660008 -0.063287629 -0.43219614
## C6   0.28229256  0.13630070  0.12498959  0.36901910  0.001866079  0.10813101
## C7  -0.23530308 -0.44748636  0.07986988 -0.34237565  0.015831963  0.03505303
## C8   0.01015517 -0.40256714  0.01478707 -0.19011220 -0.233441320 -0.15376995
## C9   0.09102931 -0.04847877 -0.51004713  0.04882813 -0.002914746  0.68111985
## C10 -0.01374605  0.00235878 -0.02878088 -0.01513581  0.814141706 -0.19941032
## C11 -0.03361245 -0.02219738  0.74953128  0.25128163 -0.162016740  0.25431367
## C12  0.06482547  0.37160078  0.04346975 -0.67487406 -0.170851553  0.08932313
## C13  0.26693636  0.36352344 -0.12209966  0.10583203 -0.093435638  0.02941375
## C14  0.14424221 -0.22627380 -0.30231514  0.22160609 -0.421924507 -0.41036760
## C15 -0.35977083 -0.04077253 -0.17977459  0.34228022  0.037912922 -0.12505441
##            PC13
## C3   0.11081954
## C4  -0.07928833
## C5  -0.56803770
## C6   0.16391107
## C7  -0.05566913
## C8  -0.18647943
## C9  -0.30943398
## C10 -0.04704713
## C11 -0.10138450
## C12  0.43604423
## C13  0.03276536
## C14  0.04996608
## C15  0.54044307
summary(PCA)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     2.6303 1.3454 1.2665 0.9121 0.74624 0.60242 0.58200
## Proportion of Variance 0.5322 0.1392 0.1234 0.0640 0.04284 0.02792 0.02606
## Cumulative Proportion  0.5322 0.6714 0.7948 0.8588 0.90165 0.92957 0.95562
##                            PC8     PC9    PC10    PC11    PC12    PC13
## Standard deviation     0.45289 0.37259 0.32675 0.26709 0.21185 0.09996
## Proportion of Variance 0.01578 0.01068 0.00821 0.00549 0.00345 0.00077
## Cumulative Proportion  0.97140 0.98208 0.99029 0.99578 0.99923 1.00000
# Cuantas componentes retener? 
PCA$sdev      # Raiz cuadrad de los valores propios
##  [1] 2.6303240 1.3453970 1.2664501 0.9121430 0.7462364 0.6024186 0.5819967
##  [8] 0.4528912 0.3725902 0.3267459 0.2670861 0.2118497 0.0999633
Var_exp=cumsum(PCA$sdev^2)*100/sum(PCA$sdev^2)  #lamnda es varianza
Var_exp  #me muestra lo que cada componente me recoje en variabilidad de los datos
##  [1]  53.22003  67.14383  79.48149  85.88152  90.16513  92.95673  95.56227
##  [8]  97.14004  98.20792  99.02917  99.57790  99.92313 100.00000
windows(height=10,width=15)
par(mfrow=c(1,2))
coord=barplot(PCA$sdev^2, xlab="Componente",ylab="Valor Propio")
lines(coord,PCA$sdev^2,col="blue",lwd=2)
abline(h=1,col="red", lty=2)
coord=barplot(Var_exp, xlab="Componente",ylab="Varianza Acumulada")
lines(coord,Var_exp,col="blue",lwd=2)
text(coord,Var_exp,round(Var_exp,2), pos=3,cex=0.6)

# Se seleccionan 2 componentes,  recogen el 74.6% de la Varianza Explicada
print(PCA$rotation[,1:3],3)
##         PC1     PC2     PC3
## C3   0.0388 -0.5623  0.3866
## C4  -0.0149 -0.6173  0.2210
## C5   0.3408  0.0694  0.1401
## C6   0.2980 -0.0221  0.1204
## C7   0.2819 -0.1531  0.1484
## C8   0.1434 -0.3154 -0.5482
## C9   0.3652  0.0235  0.0344
## C10  0.3377 -0.0410 -0.1818
## C11  0.3433 -0.0226 -0.0742
## C12  0.3528  0.1263 -0.0202
## C13 -0.0427 -0.2590 -0.6291
## C14  0.3062  0.2701  0.0639
## C15  0.3287 -0.1258 -0.0715
windows(height=10,width=15)
par(mfrow=c(3,1))
barplot(PCA$rotation[,1],ylim=c(-0.6,0.6),col=ifelse(PCA$rotation[,1]>0,"green","red"),
        main="Coeficientes estimados PC1")
barplot(PCA$rotation[,2],ylim=c(-0.6,0.6),col=ifelse(PCA$rotation[,2]>0,"green","red"),
        main="Coeficientes estimados PC2")
barplot(PCA$rotation[,3],ylim=c(-0.6,0.6),col=ifelse(PCA$rotation[,3]>0,"green","red"),
        main="Coeficientes estimados PC3")

# Interpretar las componentes

# Comp 1 = todas las variables tienen coeficientes negativos.
# representa un promedio artimético de todas las puntuaciones estandarizadas
# La componente 1 se puede ver como una estructura que mide el desempeño general en la competencia.

# Comp 2 = Coeficientes positivos para pruebas de velocidad y negativos para pruebas de potencia-fuerza.
# Representa el contraste entre velocidad y fuerza

##  Representar los individuos en las componentes principales

F_PCA=predict(PCA)[,1:3]   #predict se encarga de calcular los factores

Valor_PIB = predict(PCA)[,1]
Conoc_ByS =predict(PCA)[,2]
Comerc_Pn =predict(PCA)[,3]

rownames(PIBpc)[28]="San Andrés"
rownames(X)[27]="San Andrés"

windows(height=10,width=20)
par(mfrow=c(1,3))
dotchart(Valor_PIB,labels=rownames(X),pch=20,cex.lab=0.5, main= "PC1 : Aporte al PIB per cápita",
         cex.lab=0.8)
abline(v=0,col="red",lty=2)
dotchart(Conoc_ByS,pch=20,labels=rownames(X), main= "PC2 : Conocimiento - Bienes y Servicios",
         cex.lab=0.8)
abline(v=0,col="red",lty=2)
dotchart(Comerc_Pn,pch=20,labels=rownames(X), main= "PC3 : Fianciero y Comercial - Producción y explotación de recursos",
         cex.lab=0.8)
abline(v=0,col="red",lty=2)

windows(height=10,width=15)
plot(Valor_PIB, Conoc_ByS,pch=20,xlab="PC1 : Aporte al PIB per cápita",ylab="PC2 : Conocimiento - Bienes y Servicios")
abline(h=0,v=0,lty=2, col="red")
text(Valor_PIB, Conoc_ByS,rownames(X),cex=0.8,col="gray",pos=3)
grid()  #lineas para los ejes (malla)

windows(height=10,width=15)
plot(Valor_PIB, Comerc_Pn,pch=20,xlab="PC1 : Aporte al PIB per cápita",ylab="PC3 : Fianciero y Comercial - Producción y explotación de recursos")
abline(h=0,v=0,lty=2, col="red")
text(Valor_PIB, Comerc_Pn,rownames(X),cex=0.8,col="gray",pos=3)
grid()

windows(height=10,width=15)
plot(Conoc_ByS, Comerc_Pn,pch=20,xlab="PC2 : Conocimiento - Bienes y Servicios",ylab="PC3 : Fianciero y Comercial - Producción y explotación de recursos")
abline(h=0,v=0,lty=2, col="red")
text(Conoc_ByS, Comerc_Pn,rownames(X),cex=0.8,col="gray",pos=3)
grid()

# Representación simultánea de individuos y variables
windows(height=10,width=15)
biplot(PCA,col=c("gray","blue"),cex=0.8,xlim=c(-0.5,0.5), ylim=c(-0.5,0.5))

#los que van en la misma dirección tienen una fuerte correlación entre las vbles
#los que estan a 90 grados implican no asociación o que son vbles independientes
#En dirección contraria, asociación negativa

# Proyección de un nuevo individuo
Bogota_PIB=predict(PCA,newdata=PIBpc[3,])[1:3]
Bogota_PIB
## [1] 13.9534832  0.8162375 -7.5587336
#Al ver estos valores se observa que el dato se sale del rango en 2 de los 3 componentes
#Algunos puntos se ven al borde en algunas gráficas como San ANdrés, Casanare y Santander
#Pero al solo encontrarse en el límite de un componente principal, deciden dejarse en el análisis

windows(height=10,width=15)
plot(Valor_PIB, Comerc_Pn,pch=20,xlim= c(-5,15),ylim= c(-8,2), 
     xlab="PC1 : Aporte al PIB per cápita",ylab="PC3 : Fianciero y Comercial - Producción y explotación de recursos")
abline(h=0,v=0,lty=2, col="red")
text(Valor_PIB, Comerc_Pn,rownames(X),cex=0.8,col="gray",pos=3)
grid()  #lineas para los ejes (malla)
points(Bogota_PIB[1],Bogota_PIB[3],pch=15,col="red")
text(Bogota_PIB[1],Bogota_PIB[3],"Bogotá_Proy",cex=0.8,col="gray",pos=3) #el pos es posici?n del texto