Tarea N°7

Exportación de datos

library(dplyr)
library(kableExtra)
load("C:/Users/DELL i5/Desktop/Pablo/Ciclo II 2023/Metodos para analisis/Pablo José Flores Parra - 6-2.RData")
matriz_X=X6_2
matriz_X%>%head() %>% 
  kable(caption ="Matriz de informacion",align = "c",digits = 6) %>% 
  kable_material(html_font = "sans-serif")
Matriz de informacion
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
4 1 4 3 3 2 4 4 4 4
5 5 4 4 3 3 4 1 1 3
2 1 3 1 4 2 1 5 4 5
1 1 1 1 4 4 2 5 5 4
1 1 2 1 5 5 4 3 3 2
5 5 5 5 3 3 4 2 2 1

Calcúlo de la matriz covarianza

De forma manual

Matriz de variables centradas

library(readr)
centrado=function(x){
  x-mean(x)
  
}

Xcentrada=apply(X = matriz_X,MARGIN = 2,centrado)

Xcentrada%>%head() %>% 
  kable(caption ="Matriz de Variables centradas:",
        align = "c",
        digits = 2)%>% 
  kable_material(html_font = "sans-serif") 
Matriz de Variables centradas:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
0.3 -2.4 0.5 0.2 -0.7 -1.7 0.35 1.15 1.2 1.35
1.3 1.6 0.5 1.2 -0.7 -0.7 0.35 -1.85 -1.8 0.35
-1.7 -2.4 -0.5 -1.8 0.3 -1.7 -2.65 2.15 1.2 2.35
-2.7 -2.4 -2.5 -1.8 0.3 0.3 -1.65 2.15 2.2 1.35
-2.7 -2.4 -1.5 -1.8 1.3 1.3 0.35 0.15 0.2 -0.65
1.3 1.6 1.5 2.2 -0.7 -0.7 0.35 -0.85 -0.8 -1.65

Matriz de covarianza

n_obs=nrow(matriz_X)
matriz_v=t(Xcentrada)%*%Xcentrada/(n_obs-1)
matriz_v%>%kable(caption ="Calculo de V(X) forma manual:" ,
                align = "c",
                digits = 2) %>% 
  kable_material(html_font = "sans-serif") 
Calculo de V(X) forma manual:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
V1 1.80 1.92 1.32 1.73 -0.62 -0.31 0.36 -1.21 -1.27 -0.90
V2 1.92 2.67 1.42 2.14 -0.66 -0.14 0.52 -1.78 -1.81 -1.54
V3 1.32 1.42 1.42 1.53 -0.53 -0.32 0.29 -0.92 -1.11 -0.87
V4 1.73 2.14 1.53 2.48 -0.80 -0.48 0.35 -1.61 -1.83 -1.39
V5 -0.62 -0.66 -0.53 -0.80 0.85 0.80 0.21 0.37 0.46 0.15
V6 -0.31 -0.14 -0.32 -0.48 0.80 1.38 0.63 0.22 0.09 -0.37
V7 0.36 0.52 0.29 0.35 0.21 0.63 1.61 -0.53 -0.34 -0.71
V8 -1.21 -1.78 -0.92 -1.61 0.37 0.22 -0.53 1.92 1.81 1.37
V9 -1.27 -1.81 -1.11 -1.83 0.46 0.09 -0.34 1.81 2.17 1.56
V10 -0.90 -1.54 -0.87 -1.39 0.15 -0.37 -0.71 1.37 1.56 1.82

Usando el comando cov de R base

library(dplyr)
cov(matriz_X)%>% kable(caption="Calculo de V(X) a traves de R base",
        align = "c",
        digits = 2)%>% 
  kable_material(html_font = "sans-serif")  
Calculo de V(X) a traves de R base
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
V1 1.80 1.92 1.32 1.73 -0.62 -0.31 0.36 -1.21 -1.27 -0.90
V2 1.92 2.67 1.42 2.14 -0.66 -0.14 0.52 -1.78 -1.81 -1.54
V3 1.32 1.42 1.42 1.53 -0.53 -0.32 0.29 -0.92 -1.11 -0.87
V4 1.73 2.14 1.53 2.48 -0.80 -0.48 0.35 -1.61 -1.83 -1.39
V5 -0.62 -0.66 -0.53 -0.80 0.85 0.80 0.21 0.37 0.46 0.15
V6 -0.31 -0.14 -0.32 -0.48 0.80 1.38 0.63 0.22 0.09 -0.37
V7 0.36 0.52 0.29 0.35 0.21 0.63 1.61 -0.53 -0.34 -0.71
V8 -1.21 -1.78 -0.92 -1.61 0.37 0.22 -0.53 1.92 1.81 1.37
V9 -1.27 -1.81 -1.11 -1.83 0.46 0.09 -0.34 1.81 2.17 1.56
V10 -0.90 -1.54 -0.87 -1.39 0.15 -0.37 -0.71 1.37 1.56 1.82

Calcula la matriz de correlación para la batería de indicadores

De forma manual

Matriz de variables estándarizadas

Zx<-scale(x=matriz_X,center = TRUE)
Zx%>% head() %>% 
  kable(caption ="Matriz de Variables Estandarizadas:",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif")
Matriz de Variables Estandarizadas:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
0.22 -1.47 0.42 0.13 -0.76 -1.45 0.28 0.83 0.81 1.00
0.97 0.98 0.42 0.76 -0.76 -0.60 0.28 -1.33 -1.22 0.26
-1.27 -1.47 -0.42 -1.14 0.32 -1.45 -2.09 1.55 0.81 1.74
-2.01 -1.47 -2.10 -1.14 0.32 0.26 -1.30 1.55 1.49 1.00
-2.01 -1.47 -1.26 -1.14 1.41 1.11 0.28 0.11 0.14 -0.48
0.97 0.98 1.26 1.40 -0.76 -0.60 0.28 -0.61 -0.54 -1.22

Calcúlo de R(x)

mat_R<-t(Zx)%*%Zx/(n_obs-1) 
mat_R %>% kable(caption ="Calculo de R(X) forma manual:" ,
                align = "c",
                digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Calculo de R(X) forma manual:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
V1 1.00 0.87 0.82 0.82 -0.50 -0.19 0.21 -0.65 -0.64 -0.50
V2 0.87 1.00 0.73 0.83 -0.44 -0.07 0.25 -0.78 -0.75 -0.70
V3 0.82 0.73 1.00 0.81 -0.48 -0.23 0.19 -0.56 -0.63 -0.54
V4 0.82 0.83 0.81 1.00 -0.55 -0.26 0.17 -0.74 -0.79 -0.65
V5 -0.50 -0.44 -0.48 -0.55 1.00 0.74 0.18 0.29 0.34 0.12
V6 -0.19 -0.07 -0.23 -0.26 0.74 1.00 0.42 0.13 0.05 -0.24
V7 0.21 0.25 0.19 0.17 0.18 0.42 1.00 -0.30 -0.18 -0.41
V8 -0.65 -0.78 -0.56 -0.74 0.29 0.13 -0.30 1.00 0.89 0.73
V9 -0.64 -0.75 -0.63 -0.79 0.34 0.05 -0.18 0.89 1.00 0.78
V10 -0.50 -0.70 -0.54 -0.65 0.12 -0.24 -0.41 0.73 0.78 1.00

Usando el comando cor de R base

cor(matriz_X)%>% 
  kable(caption="Calculo de R(X) a traves de R base",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif")  
Calculo de R(X) a traves de R base
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
V1 1.00 0.87 0.82 0.82 -0.50 -0.19 0.21 -0.65 -0.64 -0.50
V2 0.87 1.00 0.73 0.83 -0.44 -0.07 0.25 -0.78 -0.75 -0.70
V3 0.82 0.73 1.00 0.81 -0.48 -0.23 0.19 -0.56 -0.63 -0.54
V4 0.82 0.83 0.81 1.00 -0.55 -0.26 0.17 -0.74 -0.79 -0.65
V5 -0.50 -0.44 -0.48 -0.55 1.00 0.74 0.18 0.29 0.34 0.12
V6 -0.19 -0.07 -0.23 -0.26 0.74 1.00 0.42 0.13 0.05 -0.24
V7 0.21 0.25 0.19 0.17 0.18 0.42 1.00 -0.30 -0.18 -0.41
V8 -0.65 -0.78 -0.56 -0.74 0.29 0.13 -0.30 1.00 0.89 0.73
V9 -0.64 -0.75 -0.63 -0.79 0.34 0.05 -0.18 0.89 1.00 0.78
V10 -0.50 -0.70 -0.54 -0.65 0.12 -0.24 -0.41 0.73 0.78 1.00

Forma Gráficas

PerformanceAnalytics

library(PerformanceAnalytics)
chart.Correlation(as.matrix(matriz_X),histogram = TRUE,pch=12)

Corrplot

library(corrplot)
library(grDevices)
library(Hmisc)
Mat_R<-rcorr(as.matrix(matriz_X))
corrplot(Mat_R$r,
         p.mat = Mat_R$r,
         type="upper",
         tl.col="red",
         tl.srt = 20,
         pch.col = "blue",
         insig = "p-value",
         sig.level = -1,
         col = terrain.colors(100))

Análisis de componentes principales,

Forma manual

Matriz R(X)

library(Hmisc)
Rx<-matriz_X %>% as.matrix() %>% rcorr()
Rx$r %>% kable(caption="Matriz R(X)",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif")
Matriz R(X)
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
V1 1.00 0.87 0.82 0.82 -0.50 -0.19 0.21 -0.65 -0.64 -0.50
V2 0.87 1.00 0.73 0.83 -0.44 -0.07 0.25 -0.78 -0.75 -0.70
V3 0.82 0.73 1.00 0.81 -0.48 -0.23 0.19 -0.56 -0.63 -0.54
V4 0.82 0.83 0.81 1.00 -0.55 -0.26 0.17 -0.74 -0.79 -0.65
V5 -0.50 -0.44 -0.48 -0.55 1.00 0.74 0.18 0.29 0.34 0.12
V6 -0.19 -0.07 -0.23 -0.26 0.74 1.00 0.42 0.13 0.05 -0.24
V7 0.21 0.25 0.19 0.17 0.18 0.42 1.00 -0.30 -0.18 -0.41
V8 -0.65 -0.78 -0.56 -0.74 0.29 0.13 -0.30 1.00 0.89 0.73
V9 -0.64 -0.75 -0.63 -0.79 0.34 0.05 -0.18 0.89 1.00 0.78
V10 -0.50 -0.70 -0.54 -0.65 0.12 -0.24 -0.41 0.73 0.78 1.00

p-values de R(X)

Rx$P %>% kable(caption="p-values de R(X)",
        align = "c",
        digits = 2) %>% 
  kable_classic_2(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
p-values de R(X)
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
V1 NA 0.00 0.00 0.00 0.02 0.41 0.37 0.00 0.00 0.03
V2 0.00 NA 0.00 0.00 0.05 0.77 0.29 0.00 0.00 0.00
V3 0.00 0.00 NA 0.00 0.03 0.34 0.42 0.01 0.00 0.01
V4 0.00 0.00 0.00 NA 0.01 0.27 0.46 0.00 0.00 0.00
V5 0.02 0.05 0.03 0.01 NA 0.00 0.46 0.21 0.14 0.61
V6 0.41 0.77 0.34 0.27 0.00 NA 0.06 0.58 0.82 0.32
V7 0.37 0.29 0.42 0.46 0.46 0.06 NA 0.20 0.45 0.07
V8 0.00 0.00 0.01 0.00 0.21 0.58 0.20 NA 0.00 0.00
V9 0.00 0.00 0.00 0.00 0.14 0.82 0.45 0.00 NA 0.00
V10 0.03 0.00 0.01 0.00 0.61 0.32 0.07 0.00 0.00 NA

Descomposición de autovalores y autovectores

library(stargazer)

descomposicion<-eigen(Rx$r)
t(descomposicion$values) %>% kable(caption="Autovalores de R(X)",
        align = "c",
        digits = 2) %>% 
  kable_classic_2(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Autovalores de R(X)
5.7 2.07 0.72 0.55 0.32 0.27 0.15 0.13 0.07 0.03

Autovectores de R(X)

descomposicion$vectors %>% kable(caption="Autovectores de R(X)",
        align = "c",
        digits = 2) %>% 
  kable_classic_2(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Autovectores de R(X)
-0.37 -0.07 -0.31 0.34 0.38 -0.13 0.05 0.14 0.67 -0.10
-0.39 0.04 -0.04 0.19 0.28 -0.47 -0.33 -0.06 -0.48 0.41
-0.35 -0.08 -0.32 0.36 -0.31 0.56 -0.11 0.34 -0.32 -0.09
-0.39 -0.08 -0.02 0.09 -0.14 0.12 0.26 -0.85 -0.03 -0.11
0.22 0.50 0.18 0.30 0.34 0.50 -0.26 -0.23 0.12 0.27
0.08 0.63 0.00 0.40 -0.13 -0.33 0.38 0.09 -0.18 -0.36
-0.12 0.47 -0.63 -0.57 0.04 0.07 0.08 -0.02 0.00 0.16
0.36 -0.11 -0.33 0.34 -0.47 -0.17 0.10 -0.10 0.19 0.58
0.37 -0.10 -0.43 0.10 0.03 -0.14 -0.56 -0.28 -0.08 -0.49
0.32 -0.32 -0.27 0.11 0.56 0.15 0.51 -0.01 -0.35 0.04

Utilizando R

library(factoextra)
library(ggplot2)
options(scipen = 99999)
PC<-princomp(x = matriz_X,cor = TRUE,fix_sign = FALSE)
factoextra::get_eig(PC) %>% kable(caption="Resumen de PCA",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif")
Resumen de PCA
eigenvalue variance.percent cumulative.variance.percent
Dim.1 5.70 57.01 57.01
Dim.2 2.07 20.69 77.70
Dim.3 0.72 7.20 84.91
Dim.4 0.55 5.48 90.39
Dim.5 0.32 3.16 93.54
Dim.6 0.27 2.71 96.25
Dim.7 0.15 1.46 97.72
Dim.8 0.13 1.28 99.00
Dim.9 0.07 0.68 99.68
Dim.10 0.03 0.32 100.00

Grafico de sedimiento

fviz_eig(PC,
         choice = "eigenvalue",
         barcolor = "blue",
         barfill = "blue",
         addlabels = TRUE, 
       )+labs(title = "Gráfico de Sedimentación",subtitle = "Usando princomp, con Autovalores")+
  xlab(label = "Componentes")+
  ylab(label = "Autovalores")+geom_hline(yintercept = 1)

¿Cuántas Componentes habría que retener?

Siguiendo el criterio del codo se deberán retener las primeras tres dimensiones pero según el componente de la raiz lantente deberán retenerse las primeras dos dimensiones.