#install.packages("easypackages") # Libreria especial para hacer carga automática de librerias
library("easypackages")
lib_req<-c("MASS","visdat","car","HSAUR2","corrplot","plotrix")# Listado de librerias requeridas por el script
easypackages::packages(lib_req) # Verificación, instalación y carga de librerias.
## Loading required package: MASS
## Loading required package: visdat
## Loading required package: car
## Loading required package: carData
## Loading required package: HSAUR2
## Loading required package: tools
## Loading required package: corrplot
## corrplot 0.92 loaded
## Loading required package: plotrix
## All packages loaded successfully
#------------------------------------------------------------------------------#
#### 1. Lectura de Datos ####
#------------------------------------------------------------------------------#
library(readxl)
file.choose() #para escoger el archivo desde la carpeta
## [1] "C:\\Users\\julia\\OneDrive\\Desktop\\Maestria\\Semestre 1\\Metodos cuantitativos\\5 Analsis de componentes principales\\PIBpc.xlsx"
PIBpc <- read_excel("C:\\Users\\julia\\OneDrive\\Desktop\\Maestria\\Semestre 1\\Metodos cuantitativos\\5 Analsis de componentes principales\\PIBpc.xlsx")
#datos<-read.table("clipboard",header=T,dec=".",sep="\t")
View(PIBpc)
str(PIBpc) # La estructura de las variables es adecuada, no requiere transofrmación.
## tibble [33 x 15] (S3: tbl_df/tbl/data.frame)
## $ Departamento : chr [1:33] "Antioquia" "Atlantico" "Bogota D.C." "Bolivar" ...
## $ Abreviatura : chr [1:33] "Anti" "Atla" "Bogo" "Boli" ...
## $ Agricultura, ganadería, caza, silvicultura y pesca : num [1:33] 1.06e-03 1.59e-04 1.77e-06 6.49e-04 2.06e-03 ...
## $ Explotación de minas y canteras : num [1:33] 3.45e-04 4.16e-05 4.49e-05 3.49e-04 1.45e-03 ...
## $ Industrias manufactureras : num [1:33] 0.00317 0.00236 0.00261 0.00235 0.00237 ...
## $ Suministro de electricidad, gas, vapor y aire acondicionado; distribución de agua; evacuación y tratamiento de aguas residuales, gestión de desechos y actividades de saneamiento ambiental : num [1:33] 0.000826 0.00097 0.000564 0.000404 0.00108 ...
## $ Construcción : num [1:33] 0.00162 0.00112 0.00148 0.00154 0.00212 ...
## $ Comercio al por mayor y al por menor; reparación de vehículos automotores y motocicletas; transporte y almacenamiento; alojamiento y servicios de comida : num [1:33] 0.00316 0.00291 0.00577 0.00256 0.00343 ...
## $ Información y comunicaciones : num [1:33] 0.000562 0.0004 0.001321 0.000284 0.000302 ...
## $ Actividades financieras y de seguros : num [1:33] 0.00097 0.000648 0.002616 0.000348 0.000368 ...
## $ Actividades inmobiliarias : num [1:33] 0.0017 0.001061 0.004024 0.000968 0.001067 ...
## $ Actividades profesionales, científicas y técnicas; actividades de servicios administrativos y de apoyo : num [1:33] 0.001726 0.001084 0.002642 0.00101 0.000827 ...
## $ Administración pública y defensa; planes de seguridad social de afiliación obligatoria; educación; actividades de atención de la salud humana y de servicios sociales : num [1:33] 0.0022 0.00225 0.00448 0.00255 0.00247 ...
## $ Actividades artísticas, de entretenimiento y recreación y otras actividades de servicios; actividades de los hogares individuales en calidad de empleadores; actividades no diferenciadas de los hogares individuales como productores de bienes y servicios para uso propio: num [1:33] 0.000465 0.000368 0.001219 0.000215 0.000209 ...
## $ Impuestos : num [1:33] 0.00173 0.00146 0.00306 0.00166 0.00132 ...
columnas <- colnames(PIBpc)
columnas
## [1] "Departamento"
## [2] "Abreviatura"
## [3] "Agricultura, ganadería, caza, silvicultura y pesca"
## [4] "Explotación de minas y canteras"
## [5] "Industrias manufactureras"
## [6] "Suministro de electricidad, gas, vapor y aire acondicionado; distribución de agua; evacuación y tratamiento de aguas residuales, gestión de desechos y actividades de saneamiento ambiental"
## [7] "Construcción"
## [8] "Comercio al por mayor y al por menor; reparación de vehículos automotores y motocicletas; transporte y almacenamiento; alojamiento y servicios de comida"
## [9] "Información y comunicaciones"
## [10] "Actividades financieras y de seguros"
## [11] "Actividades inmobiliarias"
## [12] "Actividades profesionales, científicas y técnicas; actividades de servicios administrativos y de apoyo"
## [13] "Administración pública y defensa; planes de seguridad social de afiliación obligatoria; educación; actividades de atención de la salud humana y de servicios sociales"
## [14] "Actividades artísticas, de entretenimiento y recreación y otras actividades de servicios; actividades de los hogares individuales en calidad de empleadores; actividades no diferenciadas de los hogares individuales como productores de bienes y servicios para uso propio"
## [15] "Impuestos"
colnames(PIBpc) <- c('Depto','Abrev','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15')
windows(height=10,width=15)
visdat::vis_miss(PIBpc) # Visualización de Datos Faltantes, no hay datos faltantes.
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

#todas las vbles se mueven en la misma dirección de desempeño
## Visualización Univariada de datos
PIBpc <- data.frame(PIBpc[-2],row.names = 'Depto')
Resumen= rbind(apply(PIBpc,2,"mean"),
apply(PIBpc,2,"sd"))
rownames(Resumen)=c("Promedio", "Desviación")
print(Resumen,2)
## C3 C4 C5 C6 C7 C8 C9 C10
## Promedio 0.00135 0.0016 0.0012 0.00036 0.00100 0.0026 0.00027 0.00040
## Desviación 0.00088 0.0035 0.0012 0.00029 0.00044 0.0022 0.00025 0.00045
## C11 C12 C13 C14 C15
## Promedio 0.00088 0.00063 0.00243 0.00024 0.00099
## Desviación 0.00076 0.00060 0.00047 0.00021 0.00091
windows(height=10,width=15)
par(mfrow=c(2,7))
lapply(colnames(PIBpc),function(y){
boxplot(PIBpc[,y],ylab=y,cex=1.5,pch=20,col="blue")
})
## [[1]]
## [[1]]$stats
## [,1]
## [1,] 0.0000017651
## [2,] 0.0007834526
## [3,] 0.0012259332
## [4,] 0.0019234639
## [5,] 0.0032969458
##
## [[1]]$n
## [1] 33
##
## [[1]]$conf
## [,1]
## [1,] 0.0009123814
## [2,] 0.0015394850
##
## [[1]]$out
## [1] 0.003666048
##
## [[1]]$group
## [1] 1
##
## [[1]]$names
## [1] ""
##
##
## [[2]]
## [[2]]$stats
## [,1]
## [1,] 0.0000147256
## [2,] 0.0000416027
## [3,] 0.0001453865
## [4,] 0.0007584899
## [5,] 0.0014533347
##
## [[2]]$n
## [1] 33
##
## [[2]]$conf
## [,1]
## [1,] -5.178809e-05
## [2,] 3.425611e-04
##
## [[2]]$out
## [1] 0.005066300 0.003765644 0.013433047 0.005758367 0.014689725 0.003152308
##
## [[2]]$group
## [1] 1 1 1 1 1 1
##
## [[2]]$names
## [1] ""
##
##
## [[3]]
## [[3]]$stats
## [,1]
## [1,] 0.0000098234
## [2,] 0.0002221225
## [3,] 0.0006364525
## [4,] 0.0019744641
## [5,] 0.0045157631
##
## [[3]]$n
## [1] 33
##
## [[3]]$conf
## [,1]
## [1,] 0.0001544837
## [2,] 0.0011184213
##
## [[3]]$out
## numeric(0)
##
## [[3]]$group
## numeric(0)
##
## [[3]]$names
## [1] ""
##
##
## [[4]]
## [[4]]$stats
## [,1]
## [1,] 0.0000142744
## [2,] 0.0001256419
## [3,] 0.0002925538
## [4,] 0.0005256133
## [5,] 0.0010801120
##
## [[4]]$n
## [1] 33
##
## [[4]]$conf
## [,1]
## [1,] 0.0001825446
## [2,] 0.0004025630
##
## [[4]]$out
## numeric(0)
##
## [[4]]$group
## numeric(0)
##
## [[4]]$names
## [1] ""
##
##
## [[5]]
## [[5]]$stats
## [,1]
## [1,] 0.0003409184
## [2,] 0.0006911470
## [3,] 0.0009486054
## [4,] 0.0011514308
## [5,] 0.0016201181
##
## [[5]]$n
## [1] 33
##
## [[5]]$conf
## [,1]
## [1,] 0.0008220077
## [2,] 0.0010752031
##
## [[5]]$out
## [1] 0.002120891 0.002254678
##
## [[5]]$group
## [1] 1 1
##
## [[5]]$names
## [1] ""
##
##
## [[6]]
## [[6]]$stats
## [,1]
## [1,] 0.0005394066
## [2,] 0.0014976081
## [3,] 0.0021030158
## [4,] 0.0029135101
## [5,] 0.0034254262
##
## [[6]]$n
## [1] 33
##
## [[6]]$conf
## [,1]
## [1,] 0.001713582
## [2,] 0.002492449
##
## [[6]]$out
## [1] 0.005765305 0.005508334 0.013094263
##
## [[6]]$group
## [1] 1 1 1
##
## [[6]]$names
## [1] ""
##
##
## [[7]]
## [[7]]$stats
## [,1]
## [1,] 0.0000054203
## [2,] 0.0001191128
## [3,] 0.0002068047
## [4,] 0.0003552118
## [5,] 0.0006654954
##
## [[7]]$n
## [1] 33
##
## [[7]]$conf
## [,1]
## [1,] 0.0001418674
## [2,] 0.0002717420
##
## [[7]]$out
## [1] 0.001321444
##
## [[7]]$group
## [1] 1
##
## [[7]]$names
## [1] ""
##
##
## [[8]]
## [[8]]$stats
## [,1]
## [1,] 0.0000647886
## [2,] 0.0001842099
## [3,] 0.0002381115
## [4,] 0.0004321295
## [5,] 0.0007210548
##
## [[8]]$n
## [1] 33
##
## [[8]]$conf
## [,1]
## [1,] 0.000169923
## [2,] 0.000306300
##
## [[8]]$out
## [1] 0.0009697706 0.0026159909
##
## [[8]]$group
## [1] 1 1
##
## [[8]]$names
## [1] ""
##
##
## [[9]]
## [[9]]$stats
## [,1]
## [1,] 0.0001378162
## [2,] 0.0004306560
## [3,] 0.0007258326
## [4,] 0.0010444960
## [5,] 0.0019387810
##
## [[9]]$n
## [1] 33
##
## [[9]]$conf
## [,1]
## [1,] 0.0005570004
## [2,] 0.0008946648
##
## [[9]]$out
## [1] 0.004023524 0.002423239
##
## [[9]]$group
## [1] 1 1
##
## [[9]]$names
## [1] ""
##
##
## [[10]]
## [[10]]$stats
## [,1]
## [1,] 0.0000066204
## [2,] 0.0001383614
## [3,] 0.0005452353
## [4,] 0.0009782647
## [5,] 0.0017403809
##
## [[10]]$n
## [1] 33
##
## [[10]]$conf
## [,1]
## [1,] 0.0003142260
## [2,] 0.0007762446
##
## [[10]]$out
## [1] 0.002641694
##
## [[10]]$group
## [1] 1
##
## [[10]]$names
## [1] ""
##
##
## [[11]]
## [[11]]$stats
## [,1]
## [1,] 0.001792587
## [2,] 0.002152163
## [3,] 0.002331543
## [4,] 0.002546605
## [5,] 0.003116327
##
## [[11]]$n
## [1] 33
##
## [[11]]$conf
## [,1]
## [1,] 0.002223054
## [2,] 0.002440031
##
## [[11]]$out
## [1] 0.004481686 0.003160621
##
## [[11]]$group
## [1] 1 1
##
## [[11]]$names
## [1] ""
##
##
## [[12]]
## [[12]]$stats
## [,1]
## [1,] 0.0000428233
## [2,] 0.0001209356
## [3,] 0.0001880561
## [4,] 0.0003111150
## [5,] 0.0004650461
##
## [[12]]$n
## [1] 33
##
## [[12]]$conf
## [,1]
## [1,] 0.0001357486
## [2,] 0.0002403636
##
## [[12]]$out
## [1] 0.001218857
##
## [[12]]$group
## [1] 1
##
## [[12]]$names
## [1] ""
##
##
## [[13]]
## [[13]]$stats
## [,1]
## [1,] 0.0001377523
## [2,] 0.0003961793
## [3,] 0.0006899259
## [4,] 0.0013203308
## [5,] 0.0018295074
##
## [[13]]$n
## [1] 33
##
## [[13]]$conf
## [,1]
## [1,] 0.0004357448
## [2,] 0.0009441070
##
## [[13]]$out
## [1] 0.003058367 0.004443086
##
## [[13]]$group
## [1] 1 1
##
## [[13]]$names
## [1] ""
which (PIBpc$C3==boxplot.stats(PIBpc$C3)$out) #busca cuál es el dato por fuera del boxplot
## [1] 26
which (PIBpc$C4%in%boxplot.stats(PIBpc$C4)$out)
## [1] 9 14 16 25 26 27
which (PIBpc$C5==boxplot.stats(PIBpc$C5)$out)
## integer(0)
which (PIBpc$C6==boxplot.stats(PIBpc$C6)$out)
## integer(0)
which (PIBpc$C7%in%boxplot.stats(PIBpc$C7)$out)
## [1] 5 21
which (PIBpc$C8%in%boxplot.stats(PIBpc$C8)$out)
## [1] 3 26 28
which (PIBpc$C9==boxplot.stats(PIBpc$C9)$out)
## [1] 3
which (PIBpc$C10%in%boxplot.stats(PIBpc$C10)$out)
## [1] 1 3
which (PIBpc$C11%in%boxplot.stats(PIBpc$C11)$out)
## [1] 3 24
which (PIBpc$C12==boxplot.stats(PIBpc$C12)$out)
## [1] 3
which (PIBpc$C13%in%boxplot.stats(PIBpc$C13)$out)
## [1] 3 28
which (PIBpc$C14==boxplot.stats(PIBpc$C14)$out)
## [1] 3
which (PIBpc$C15%in%boxplot.stats(PIBpc$C15)$out)
## [1] 3 21
## Aná¡lisis Bivariado de la correlación.
windows(height=10,width=15)
pairs(PIBpc,pch=20,cex=1.5,lower.panel = NULL) #con pairs hace parejas de datos

#Se observa que desde la columna 8 hasta la 15 hay un punto atÃpico comparando todas las vbles
M.cor = cor(PIBpc,method="pearson")
print(M.cor,2) #las columnas 3 y 4 solo tienen correlación entre ellas, pero no con las otras
## C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13
## C3 1.000 0.6160 0.050 0.0887 0.202 -0.066 -0.12 -0.238 -0.139 -0.20 -0.261
## C4 0.616 1.0000 -0.160 0.0083 0.074 0.135 -0.10 -0.047 -0.063 -0.15 -0.072
## C5 0.050 -0.1600 1.000 0.7457 0.743 0.175 0.71 0.475 0.640 0.78 0.042
## C6 0.089 0.0083 0.746 1.0000 0.623 0.323 0.56 0.397 0.469 0.64 -0.041
## C7 0.202 0.0735 0.743 0.6229 1.000 0.172 0.58 0.408 0.541 0.55 0.144
## C8 -0.066 0.1351 0.175 0.3234 0.172 1.000 0.40 0.462 0.432 0.40 0.486
## C9 -0.123 -0.1016 0.708 0.5593 0.578 0.397 1.00 0.923 0.930 0.92 0.527
## C10 -0.238 -0.0472 0.475 0.3969 0.408 0.462 0.92 1.000 0.926 0.85 0.708
## C11 -0.139 -0.0632 0.640 0.4691 0.541 0.432 0.93 0.926 1.000 0.89 0.573
## C12 -0.196 -0.1476 0.778 0.6393 0.552 0.402 0.92 0.846 0.894 1.00 0.383
## C13 -0.261 -0.0716 0.042 -0.0407 0.144 0.486 0.53 0.708 0.573 0.38 1.000
## C14 -0.291 -0.1874 0.520 0.4138 0.418 0.258 0.92 0.936 0.902 0.86 0.554
## C15 0.013 -0.0217 0.849 0.5779 0.741 0.480 0.81 0.655 0.776 0.76 0.349
## C14 C15
## C3 -0.29 0.013
## C4 -0.19 -0.022
## C5 0.52 0.849
## C6 0.41 0.578
## C7 0.42 0.741
## C8 0.26 0.480
## C9 0.92 0.811
## C10 0.94 0.655
## C11 0.90 0.776
## C12 0.86 0.756
## C13 0.55 0.349
## C14 1.00 0.599
## C15 0.60 1.000
p.cor=corrplot::cor.mtest(PIBpc)$p #test de correlación, significancia estadÃstica de valor p
print(p.cor,4) #C3 y C4 en su mayorÃa son superior a 5% indica que NO hay significancia
## C3 C4 C5 C6 C7 C8 C9
## C3 0.0000000 0.0001353 7.812e-01 6.236e-01 2.604e-01 0.714674 4.941e-01
## C4 0.0001353 0.0000000 3.737e-01 9.632e-01 6.844e-01 0.453375 5.736e-01
## C5 0.7811500 0.3736882 0.000e+00 6.355e-07 7.191e-07 0.330807 4.104e-06
## C6 0.6236246 0.9632316 6.355e-07 0.000e+00 1.081e-04 0.066343 7.145e-04
## C7 0.2604244 0.6843734 7.191e-07 1.081e-04 0.000e+00 0.337505 4.318e-04
## C8 0.7146739 0.4533752 3.308e-01 6.634e-02 3.375e-01 0.000000 2.204e-02
## C9 0.4941231 0.5736426 4.104e-06 7.145e-04 4.318e-04 0.022042 0.000e+00
## C10 0.1823453 0.7943217 5.209e-03 2.219e-02 1.844e-02 0.006781 2.057e-14
## C11 0.4405647 0.7266624 6.108e-05 5.887e-03 1.152e-03 0.011969 5.251e-15
## C12 0.2736935 0.4125066 9.849e-08 6.206e-05 8.629e-04 0.020501 2.606e-14
## C13 0.1426348 0.6921206 8.161e-01 8.221e-01 4.225e-01 0.004134 1.636e-03
## C14 0.1002207 0.2963759 1.902e-03 1.667e-02 1.560e-02 0.147192 2.756e-14
## C15 0.9413917 0.9044553 4.084e-10 4.281e-04 8.007e-07 0.004699 1.038e-08
## C10 C11 C12 C13 C14 C15
## C3 1.823e-01 4.406e-01 2.737e-01 1.426e-01 1.002e-01 9.414e-01
## C4 7.943e-01 7.267e-01 4.125e-01 6.921e-01 2.964e-01 9.045e-01
## C5 5.209e-03 6.108e-05 9.849e-08 8.161e-01 1.902e-03 4.084e-10
## C6 2.219e-02 5.887e-03 6.206e-05 8.221e-01 1.667e-02 4.281e-04
## C7 1.844e-02 1.152e-03 8.629e-04 4.225e-01 1.560e-02 8.007e-07
## C8 6.781e-03 1.197e-02 2.050e-02 4.134e-03 1.472e-01 4.699e-03
## C9 2.057e-14 5.251e-15 2.606e-14 1.636e-03 2.756e-14 1.038e-08
## C10 0.000e+00 1.245e-14 5.768e-10 4.097e-06 1.275e-15 3.562e-05
## C11 1.245e-14 0.000e+00 2.343e-12 4.865e-04 7.891e-13 1.125e-07
## C12 5.768e-10 2.343e-12 0.000e+00 2.801e-02 1.249e-10 3.644e-07
## C13 4.097e-06 4.865e-04 2.801e-02 0.000e+00 8.186e-04 4.628e-02
## C14 1.275e-15 7.891e-13 1.249e-10 8.186e-04 0.000e+00 2.329e-04
## C15 3.562e-05 1.125e-07 3.644e-07 4.628e-02 2.329e-04 0.000e+00
#El hecho que el aspecto 3 y 4 aumente el PIB, no se realiciona con el resto, no se asocia con ninguna
windows(height=10,width=15)
corrplot::corrplot(M.cor, method = "ellipse",addCoef.col = "black",type="upper",
col=c("blue","red"),diag=FALSE,
p.mat = p.cor, sig.level = 0.01, insig = "blank"
) #asociación positiva roja, negativa azul

#ordene por grupitos relacionados, cluster (order="hclust")
#grafique las que son mayores a 1% y las insignificantes no las grafique, en blanco
#Con la gráfica al ver todo en rojo se certifica la conclusión anterior que todas se relacionan de manera positiva
# Sin considerar el registro atípico
M.cor = cor(PIBpc[-3,],method="pearson") #se quita registro atÃpico
print(M.cor,2)
## C3 C4 C5 C6 C7 C8 C9 C10 C11 C12
## C3 1.0000 0.6200 0.11 0.129 0.271 0.0045 0.131 0.012 0.1001 -0.042
## C4 0.6200 1.0000 -0.15 0.018 0.091 0.1613 -0.065 0.050 -0.0071 -0.126
## C5 0.1142 -0.1473 1.00 0.742 0.732 0.1280 0.856 0.646 0.7408 0.833
## C6 0.1290 0.0185 0.74 1.000 0.615 0.3036 0.712 0.628 0.5662 0.709
## C7 0.2707 0.0910 0.73 0.615 1.000 0.1286 0.667 0.520 0.6027 0.553
## C8 0.0045 0.1613 0.13 0.304 0.129 1.0000 0.320 0.528 0.3732 0.320
## C9 0.1309 -0.0646 0.86 0.712 0.667 0.3204 1.000 0.841 0.8406 0.894
## C10 0.0115 0.0499 0.65 0.628 0.520 0.5278 0.841 1.000 0.8643 0.856
## C11 0.1001 -0.0071 0.74 0.566 0.603 0.3732 0.841 0.864 1.0000 0.839
## C12 -0.0424 -0.1258 0.83 0.709 0.553 0.3200 0.894 0.856 0.8389 1.000
## C13 -0.0783 -0.0162 -0.20 -0.225 -0.015 0.4733 -0.154 0.042 -0.0206 -0.172
## C14 -0.1195 -0.2202 0.64 0.561 0.467 0.0822 0.808 0.775 0.7632 0.817
## C15 0.1430 0.0116 0.86 0.582 0.739 0.4249 0.837 0.695 0.7723 0.699
## C13 C14 C15
## C3 -0.078 -0.120 0.143
## C4 -0.016 -0.220 0.012
## C5 -0.200 0.638 0.856
## C6 -0.225 0.561 0.582
## C7 -0.015 0.467 0.739
## C8 0.473 0.082 0.425
## C9 -0.154 0.808 0.837
## C10 0.042 0.775 0.695
## C11 -0.021 0.763 0.772
## C12 -0.172 0.817 0.699
## C13 1.000 -0.278 0.050
## C14 -0.278 1.000 0.508
## C15 0.050 0.508 1.000
p.cor=corrplot::cor.mtest(PIBpc)$p
print(p.cor,4)
## C3 C4 C5 C6 C7 C8 C9
## C3 0.0000000 0.0001353 7.812e-01 6.236e-01 2.604e-01 0.714674 4.941e-01
## C4 0.0001353 0.0000000 3.737e-01 9.632e-01 6.844e-01 0.453375 5.736e-01
## C5 0.7811500 0.3736882 0.000e+00 6.355e-07 7.191e-07 0.330807 4.104e-06
## C6 0.6236246 0.9632316 6.355e-07 0.000e+00 1.081e-04 0.066343 7.145e-04
## C7 0.2604244 0.6843734 7.191e-07 1.081e-04 0.000e+00 0.337505 4.318e-04
## C8 0.7146739 0.4533752 3.308e-01 6.634e-02 3.375e-01 0.000000 2.204e-02
## C9 0.4941231 0.5736426 4.104e-06 7.145e-04 4.318e-04 0.022042 0.000e+00
## C10 0.1823453 0.7943217 5.209e-03 2.219e-02 1.844e-02 0.006781 2.057e-14
## C11 0.4405647 0.7266624 6.108e-05 5.887e-03 1.152e-03 0.011969 5.251e-15
## C12 0.2736935 0.4125066 9.849e-08 6.206e-05 8.629e-04 0.020501 2.606e-14
## C13 0.1426348 0.6921206 8.161e-01 8.221e-01 4.225e-01 0.004134 1.636e-03
## C14 0.1002207 0.2963759 1.902e-03 1.667e-02 1.560e-02 0.147192 2.756e-14
## C15 0.9413917 0.9044553 4.084e-10 4.281e-04 8.007e-07 0.004699 1.038e-08
## C10 C11 C12 C13 C14 C15
## C3 1.823e-01 4.406e-01 2.737e-01 1.426e-01 1.002e-01 9.414e-01
## C4 7.943e-01 7.267e-01 4.125e-01 6.921e-01 2.964e-01 9.045e-01
## C5 5.209e-03 6.108e-05 9.849e-08 8.161e-01 1.902e-03 4.084e-10
## C6 2.219e-02 5.887e-03 6.206e-05 8.221e-01 1.667e-02 4.281e-04
## C7 1.844e-02 1.152e-03 8.629e-04 4.225e-01 1.560e-02 8.007e-07
## C8 6.781e-03 1.197e-02 2.050e-02 4.134e-03 1.472e-01 4.699e-03
## C9 2.057e-14 5.251e-15 2.606e-14 1.636e-03 2.756e-14 1.038e-08
## C10 0.000e+00 1.245e-14 5.768e-10 4.097e-06 1.275e-15 3.562e-05
## C11 1.245e-14 0.000e+00 2.343e-12 4.865e-04 7.891e-13 1.125e-07
## C12 5.768e-10 2.343e-12 0.000e+00 2.801e-02 1.249e-10 3.644e-07
## C13 4.097e-06 4.865e-04 2.801e-02 0.000e+00 8.186e-04 4.628e-02
## C14 1.275e-15 7.891e-13 1.249e-10 8.186e-04 0.000e+00 2.329e-04
## C15 3.562e-05 1.125e-07 3.644e-07 4.628e-02 2.329e-04 0.000e+00
windows(height=10,width=15)
corrplot::corrplot(M.cor, method = "ellipse",addCoef.col = "black",type="upper",
col=c("blue","red"),diag=FALSE,
p.mat = p.cor, sig.level = 0.01, insig = "blank"
)

#No afectó la significancia de ninguna de las correlaciones, pero si afecta los coeficientes
# el punto Afecta la estructura de la correlación
windows()
plot(PIBpc$C9,PIBpc$C10,pch=20)#para ver como afecta el dato atÃpico

#plot(PIBpc$C9,PIBpc$C10,pch=20,xlim = c(0,0.0007), ylim= c(0,0.001))
plot(PIBpc$C8,PIBpc$C15,pch=20)

plot(PIBpc$C12,PIBpc$C13,pch=20)

plot(PIBpc$C14,PIBpc$C11,pch=20)

#modifica mucho no la significancia de las correlaciones sino las magnitudes
#------------------------------------------------------------------------------#
#### 2. Análisis de Componentes Principales ####
#------------------------------------------------------------------------------#
X=PIBpc[-3,] # Datos sin considerar el registro atÃpico
PCA=prcomp(X,center=TRUE,scale=TRUE) # Las variables son autoescaladas
#La anterior es la que genera los componentes
PCA
## Standard deviations (1, .., p=13):
## [1] 2.6303240 1.3453970 1.2664501 0.9121430 0.7462364 0.6024186 0.5819967
## [8] 0.4528912 0.3725902 0.3267459 0.2670861 0.2118497 0.0999633
##
## Rotation (n x k) = (13 x 13):
## PC1 PC2 PC3 PC4 PC5 PC6
## C3 0.03884458 -0.56227546 0.38662216 0.042451625 0.187009770 -0.22574917
## C4 -0.01492272 -0.61728694 0.22100590 -0.357624300 0.029324100 0.22869718
## C5 0.34076419 0.06938535 0.14008413 0.313715540 -0.104151815 -0.23008127
## C6 0.29797814 -0.02210956 0.12036551 -0.002692612 -0.691537879 0.35968507
## C7 0.28189051 -0.15310036 0.14837604 0.506923376 0.040899485 0.48233620
## C8 0.14344466 -0.31544078 -0.54815221 -0.236003778 -0.391676835 -0.24269736
## C9 0.36518014 0.02351475 0.03437989 -0.026983953 0.073584459 -0.16098079
## C10 0.33770141 -0.04098155 -0.18180508 -0.324675779 0.134458476 0.14722489
## C11 0.34328403 -0.02262005 -0.07420687 -0.120471118 0.353072725 -0.09590779
## C12 0.35282812 0.12629871 -0.02019002 -0.175880682 -0.008617542 -0.03474902
## C13 -0.04270752 -0.25898530 -0.62907602 0.340481234 0.274050965 0.32272686
## C14 0.30618151 0.27013184 0.06388215 -0.296128573 0.310468454 0.28929247
## C15 0.32865962 -0.12576693 -0.07151956 0.322489588 0.016992208 -0.42075576
## PC7 PC8 PC9 PC10 PC11 PC12
## C3 0.62675471 -0.13050140 0.00919123 -0.06803383 0.054958448 -0.12351784
## C4 -0.48628525 0.30999650 -0.11884656 0.06287157 -0.178220294 0.01163943
## C5 -0.01556021 0.42431379 0.01690890 -0.05660008 -0.063287629 -0.43219614
## C6 0.28229256 0.13630070 0.12498959 0.36901910 0.001866079 0.10813101
## C7 -0.23530308 -0.44748636 0.07986988 -0.34237565 0.015831963 0.03505303
## C8 0.01015517 -0.40256714 0.01478707 -0.19011220 -0.233441320 -0.15376995
## C9 0.09102931 -0.04847877 -0.51004713 0.04882813 -0.002914746 0.68111985
## C10 -0.01374605 0.00235878 -0.02878088 -0.01513581 0.814141706 -0.19941032
## C11 -0.03361245 -0.02219738 0.74953128 0.25128163 -0.162016740 0.25431367
## C12 0.06482547 0.37160078 0.04346975 -0.67487406 -0.170851553 0.08932313
## C13 0.26693636 0.36352344 -0.12209966 0.10583203 -0.093435638 0.02941375
## C14 0.14424221 -0.22627380 -0.30231514 0.22160609 -0.421924507 -0.41036760
## C15 -0.35977083 -0.04077253 -0.17977459 0.34228022 0.037912922 -0.12505441
## PC13
## C3 0.11081954
## C4 -0.07928833
## C5 -0.56803770
## C6 0.16391107
## C7 -0.05566913
## C8 -0.18647943
## C9 -0.30943398
## C10 -0.04704713
## C11 -0.10138450
## C12 0.43604423
## C13 0.03276536
## C14 0.04996608
## C15 0.54044307
summary(PCA)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.6303 1.3454 1.2665 0.9121 0.74624 0.60242 0.58200
## Proportion of Variance 0.5322 0.1392 0.1234 0.0640 0.04284 0.02792 0.02606
## Cumulative Proportion 0.5322 0.6714 0.7948 0.8588 0.90165 0.92957 0.95562
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.45289 0.37259 0.32675 0.26709 0.21185 0.09996
## Proportion of Variance 0.01578 0.01068 0.00821 0.00549 0.00345 0.00077
## Cumulative Proportion 0.97140 0.98208 0.99029 0.99578 0.99923 1.00000
# Cuantas componentes retener?
PCA$sdev # Raiz cuadrad de los valores propios
## [1] 2.6303240 1.3453970 1.2664501 0.9121430 0.7462364 0.6024186 0.5819967
## [8] 0.4528912 0.3725902 0.3267459 0.2670861 0.2118497 0.0999633
Var_exp=cumsum(PCA$sdev^2)*100/sum(PCA$sdev^2) #lamnda es varianza
Var_exp #me muestra lo que cada componente me recoje en variabilidad de los datos
## [1] 53.22003 67.14383 79.48149 85.88152 90.16513 92.95673 95.56227
## [8] 97.14004 98.20792 99.02917 99.57790 99.92313 100.00000
windows(height=10,width=15)
par(mfrow=c(1,2))
coord=barplot(PCA$sdev^2, xlab="Componente",ylab="Valor Propio")
lines(coord,PCA$sdev^2,col="blue",lwd=2)
abline(h=1,col="red", lty=2)
coord=barplot(Var_exp, xlab="Componente",ylab="Varianza Acumulada")
lines(coord,Var_exp,col="blue",lwd=2)
text(coord,Var_exp,round(Var_exp,2), pos=3,cex=0.6)

# Se seleccionan 2 componentes, recogen el 74.6% de la Varianza Explicada
print(PCA$rotation[,1:3],3)
## PC1 PC2 PC3
## C3 0.0388 -0.5623 0.3866
## C4 -0.0149 -0.6173 0.2210
## C5 0.3408 0.0694 0.1401
## C6 0.2980 -0.0221 0.1204
## C7 0.2819 -0.1531 0.1484
## C8 0.1434 -0.3154 -0.5482
## C9 0.3652 0.0235 0.0344
## C10 0.3377 -0.0410 -0.1818
## C11 0.3433 -0.0226 -0.0742
## C12 0.3528 0.1263 -0.0202
## C13 -0.0427 -0.2590 -0.6291
## C14 0.3062 0.2701 0.0639
## C15 0.3287 -0.1258 -0.0715
windows(height=10,width=15)
par(mfrow=c(3,1))
barplot(PCA$rotation[,1],ylim=c(-0.6,0.6),col=ifelse(PCA$rotation[,1]>0,"green","red"),
main="Coeficientes estimados PC1")
barplot(PCA$rotation[,2],ylim=c(-0.6,0.6),col=ifelse(PCA$rotation[,2]>0,"green","red"),
main="Coeficientes estimados PC2")
barplot(PCA$rotation[,3],ylim=c(-0.6,0.6),col=ifelse(PCA$rotation[,3]>0,"green","red"),
main="Coeficientes estimados PC3")

# Interpretar las componentes
# Comp 1 = todas las variables tienen coeficientes negativos.
# representa un promedio artimético de todas las puntuaciones estandarizadas
# La componente 1 se puede ver como una estructura que mide el desempeño general en la competencia.
# Comp 2 = Coeficientes positivos para pruebas de velocidad y negativos para pruebas de potencia-fuerza.
# Representa el contraste entre velocidad y fuerza
## Representar los individuos en las componentes principales
F_PCA=predict(PCA)[,1:3] #predict se encarga de calcular los factores
Valor_PIB = predict(PCA)[,1]
Conoc_ByS =predict(PCA)[,2]
Comerc_Pn =predict(PCA)[,3]
rownames(PIBpc)[28]="San Andrés"
rownames(X)[27]="San Andrés"
windows(height=10,width=20)
par(mfrow=c(1,3))
dotchart(Valor_PIB,labels=rownames(X),pch=20,cex.lab=0.5, main= "PC1 : Aporte al PIB per cápita",
cex.lab=0.8)
abline(v=0,col="red",lty=2)
dotchart(Conoc_ByS,pch=20,labels=rownames(X), main= "PC2 : Conocimiento - Bienes y Servicios",
cex.lab=0.8)
abline(v=0,col="red",lty=2)
dotchart(Comerc_Pn,pch=20,labels=rownames(X), main= "PC3 : Fianciero y Comercial - Producción y explotación de recursos",
cex.lab=0.8)
abline(v=0,col="red",lty=2)

windows(height=10,width=15)
plot(Valor_PIB, Conoc_ByS,pch=20,xlab="PC1 : Aporte al PIB per cápita",ylab="PC2 : Conocimiento - Bienes y Servicios")
abline(h=0,v=0,lty=2, col="red")
text(Valor_PIB, Conoc_ByS,rownames(X),cex=0.8,col="gray",pos=3)
grid() #lineas para los ejes (malla)

windows(height=10,width=15)
plot(Valor_PIB, Comerc_Pn,pch=20,xlab="PC1 : Aporte al PIB per cápita",ylab="PC3 : Fianciero y Comercial - Producción y explotación de recursos")
abline(h=0,v=0,lty=2, col="red")
text(Valor_PIB, Comerc_Pn,rownames(X),cex=0.8,col="gray",pos=3)
grid()

windows(height=10,width=15)
plot(Conoc_ByS, Comerc_Pn,pch=20,xlab="PC2 : Conocimiento - Bienes y Servicios",ylab="PC3 : Fianciero y Comercial - Producción y explotación de recursos")
abline(h=0,v=0,lty=2, col="red")
text(Conoc_ByS, Comerc_Pn,rownames(X),cex=0.8,col="gray",pos=3)
grid()

# Representación simultánea de individuos y variables
windows(height=10,width=15)
biplot(PCA,col=c("gray","blue"),cex=0.8,xlim=c(-0.5,0.5), ylim=c(-0.5,0.5))

#los que van en la misma dirección tienen una fuerte correlación entre las vbles
#los que estan a 90 grados implican no asociación o que son vbles independientes
#En dirección contraria, asociación negativa
# Proyección de un nuevo individuo
Bogota_PIB=predict(PCA,newdata=PIBpc[3,])[1:3]
Bogota_PIB
## [1] 13.9534832 0.8162375 -7.5587336
#Al ver estos valores se observa que el dato se sale del rango en 2 de los 3 componentes
#Algunos puntos se ven al borde en algunas gráficas como San ANdrés, Casanare y Santander
#Pero al solo encontrarse en el límite de un componente principal, deciden dejarse en el análisis
windows(height=10,width=15)
plot(Valor_PIB, Comerc_Pn,pch=20,xlim= c(-5,15),ylim= c(-8,2),
xlab="PC1 : Aporte al PIB per cápita",ylab="PC3 : Fianciero y Comercial - Producción y explotación de recursos")
abline(h=0,v=0,lty=2, col="red")
text(Valor_PIB, Comerc_Pn,rownames(X),cex=0.8,col="gray",pos=3)
grid() #lineas para los ejes (malla)
points(Bogota_PIB[1],Bogota_PIB[3],pch=15,col="red")
text(Bogota_PIB[1],Bogota_PIB[3],"Bogotá_Proy",cex=0.8,col="gray",pos=3) #el pos es posici?n del texto
