2.2.1 Matriz de Información: X

library(readr)
library(kableExtra)
url_link<-"http://halweb.uc3m.es/esp/Personal/personas/agrane/libro/ficheros_datos/capitulo_7/datos_prob_7_3.txt"
mat_X<-read_table2(url_link,col_names = FALSE)

mat_X %>% head() %>% 
  kable(caption ="Matriz de información:" ,align = "c",digits = 6) %>% 
  kable_material(html_font = "sans-serif")
Matriz de información:
X1 X2 X3 X4 X5 X6 X7 X8
30 41 670 3903 12 94 341 1.2
124 46 410 955 6 57 89 0.5
95 48 370 6 5 26 20 0.1
90 43 680 435 8 20 331 1.6
112 41 100 1293 2 51 22 0.1
73 51 390 6115 4 35 93 0.2

2.2.2. Cálculo de V(X):

Cálculo “Manual”

library(dplyr)
library(kableExtra)
centrado<-function(x){
  x-mean(x)
}
Xcentrada<-apply(X = mat_X,MARGIN = 2,centrado)
Xcentrada %>% head() %>% 
  kable(caption ="Matriz de Variables centradas:",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif")
Matriz de Variables centradas:
X1 X2 X3 X4 X5 X6 X7 X8
-49.67 -0.67 303.89 1463.5 2.94 -60.17 196.72 0.71
44.33 4.33 43.89 -1484.5 -3.06 -97.17 -55.28 0.01
15.33 6.33 3.89 -2433.5 -4.06 -128.17 -124.28 -0.39
10.33 1.33 313.89 -2004.5 -1.06 -134.17 186.72 1.11
32.33 -0.67 -266.11 -1146.5 -7.06 -103.17 -122.28 -0.39
-6.67 9.33 23.89 3675.5 -5.06 -119.17 -51.28 -0.29
n_obs<-nrow(mat_X)
mat_V<-t(Xcentrada)%*%Xcentrada/(n_obs-1) 
mat_V %>% kable(caption ="Cálculo de V(X) forma manual:" ,
                align = "c",
                digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de V(X) forma manual:
X1 X2 X3 X4 X5 X6 X7 X8
X1 716.12 45.06 -2689.61 -16082.06 -121.63 -1019.06 -1844.37 -5.15
X2 45.06 46.94 -144.31 2756.71 -24.63 -938.41 -205.25 -0.42
X3 -2689.61 -144.31 36389.87 123889.71 740.82 838.33 17499.38 73.48
X4 -16082.06 2756.71 123889.71 5736372.38 3078.97 6672.44 140343.50 412.79
X5 -121.63 -24.63 740.82 3078.97 51.47 405.58 565.22 1.59
X6 -1019.06 -938.41 838.33 6672.44 405.58 26579.56 3149.77 -2.96
X7 -1844.37 -205.25 17499.38 140343.50 565.22 3149.77 16879.39 64.51
X8 -5.15 -0.42 73.48 412.79 1.59 -2.96 64.51 0.28

Cálculo con R base

library(dplyr)
library(kableExtra)
cov(mat_X) %>% 
  kable(caption="Cálculo de V(X) a través de R base",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de V(X) a través de R base
X1 X2 X3 X4 X5 X6 X7 X8
X1 716.12 45.06 -2689.61 -16082.06 -121.63 -1019.06 -1844.37 -5.15
X2 45.06 46.94 -144.31 2756.71 -24.63 -938.41 -205.25 -0.42
X3 -2689.61 -144.31 36389.87 123889.71 740.82 838.33 17499.38 73.48
X4 -16082.06 2756.71 123889.71 5736372.38 3078.97 6672.44 140343.50 412.79
X5 -121.63 -24.63 740.82 3078.97 51.47 405.58 565.22 1.59
X6 -1019.06 -938.41 838.33 6672.44 405.58 26579.56 3149.77 -2.96
X7 -1844.37 -205.25 17499.38 140343.50 565.22 3149.77 16879.39 64.51
X8 -5.15 -0.42 73.48 412.79 1.59 -2.96 64.51 0.28

2.2.3. Cálculo de R(X)

Cálculo “Manual”

Zx<-scale(x = mat_X,center =TRUE)
Zx %>% head() %>% 
  kable(caption ="Matriz de Variables Estandarizadas:",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif")
Matriz de Variables Estandarizadas:
X1 X2 X3 X4 X5 X6 X7 X8
-1.86 -0.10 1.59 0.61 0.41 -0.37 1.51 1.34
1.66 0.63 0.23 -0.62 -0.43 -0.60 -0.43 0.02
0.57 0.92 0.02 -1.02 -0.57 -0.79 -0.96 -0.73
0.39 0.19 1.65 -0.84 -0.15 -0.82 1.44 2.09
1.21 -0.10 -1.39 -0.48 -0.98 -0.63 -0.94 -0.73
-0.25 1.36 0.13 1.53 -0.70 -0.73 -0.39 -0.54
n_obs<-nrow(mat_X)
mat_R<-t(Zx)%*%Zx/(n_obs-1) 
mat_R %>% kable(caption ="Cálculo de R(X) forma manual:" ,
                align = "c",
                digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de R(X) forma manual:
X1 X2 X3 X4 X5 X6 X7 X8
X1 1.00 0.25 -0.53 -0.25 -0.63 -0.23 -0.53 -0.36
X2 0.25 1.00 -0.11 0.17 -0.50 -0.84 -0.23 -0.12
X3 -0.53 -0.11 1.00 0.27 0.54 0.03 0.71 0.73
X4 -0.25 0.17 0.27 1.00 0.18 0.02 0.45 0.32
X5 -0.63 -0.50 0.54 0.18 1.00 0.35 0.61 0.42
X6 -0.23 -0.84 0.03 0.02 0.35 1.00 0.15 -0.03
X7 -0.53 -0.23 0.71 0.45 0.61 0.15 1.00 0.93
X8 -0.36 -0.12 0.73 0.32 0.42 -0.03 0.93 1.00

Cálculo usando R base

library(dplyr)
library(kableExtra)
cor(mat_X) %>% 
  kable(caption="Cálculo de R(X) a través de R base",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de R(X) a través de R base
X1 X2 X3 X4 X5 X6 X7 X8
X1 1.00 0.25 -0.53 -0.25 -0.63 -0.23 -0.53 -0.36
X2 0.25 1.00 -0.11 0.17 -0.50 -0.84 -0.23 -0.12
X3 -0.53 -0.11 1.00 0.27 0.54 0.03 0.71 0.73
X4 -0.25 0.17 0.27 1.00 0.18 0.02 0.45 0.32
X5 -0.63 -0.50 0.54 0.18 1.00 0.35 0.61 0.42
X6 -0.23 -0.84 0.03 0.02 0.35 1.00 0.15 -0.03
X7 -0.53 -0.23 0.71 0.45 0.61 0.15 1.00 0.93
X8 -0.36 -0.12 0.73 0.32 0.42 -0.03 0.93 1.00

2.2.4 Versiones gráficas de R(X)

Pueden ser especialmente útiles en la presentación de reportes, además de proveer una vista rápida de algunas características propias de la correlación entre las variables.

2.2.4.1 Usando el paquete PerformanceAnalytics

library(PerformanceAnalytics)
chart.Correlation(as.matrix(mat_X),histogram = TRUE,pch=12)

2.2.4.2. Usando el paquete corrplot

library(corrplot)
library(grDevices)
library(Hmisc)
Mat_R<-rcorr(as.matrix(mat_X))
corrplot(Mat_R$r,
         p.mat = Mat_R$r,
         type="upper",
         tl.col="black",
         tl.srt = 20,
         pch.col = "blue",
         insig = "p-value",
         sig.level = -1,
         col = terrain.colors(100))

2.3.1. Ejemplo de extracción.

Cálculo “manual” de los componentes

Siguiendo el ejemplo de los datos de “Desarrollo”, se encontraran todos los componentes de la batería de indicadores (dataframe):

library(kableExtra)
library(dplyr)
library(Hmisc)
Rx<-mat_X %>% as.matrix() %>% rcorr()
Rx$r %>% kable(caption="Matriz R(X)",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Matriz R(X)
X1 X2 X3 X4 X5 X6 X7 X8
X1 1.00 0.25 -0.53 -0.25 -0.63 -0.23 -0.53 -0.36
X2 0.25 1.00 -0.11 0.17 -0.50 -0.84 -0.23 -0.12
X3 -0.53 -0.11 1.00 0.27 0.54 0.03 0.71 0.73
X4 -0.25 0.17 0.27 1.00 0.18 0.02 0.45 0.32
X5 -0.63 -0.50 0.54 0.18 1.00 0.35 0.61 0.42
X6 -0.23 -0.84 0.03 0.02 0.35 1.00 0.15 -0.03
X7 -0.53 -0.23 0.71 0.45 0.61 0.15 1.00 0.93
X8 -0.36 -0.12 0.73 0.32 0.42 -0.03 0.93 1.00
Rx$P %>% kable(caption="p-values de R(X)",
        align = "c",
        digits = 2) %>% 
  kable_classic_2(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
p-values de R(X)
X1 X2 X3 X4 X5 X6 X7 X8
X1 NA 0.33 0.02 0.32 0.00 0.35 0.02 0.14
X2 0.33 NA 0.66 0.51 0.03 0.00 0.36 0.65
X3 0.02 0.66 NA 0.28 0.02 0.92 0.00 0.00
X4 0.32 0.51 0.28 NA 0.48 0.95 0.06 0.19
X5 0.00 0.03 0.02 0.48 NA 0.16 0.01 0.08
X6 0.35 0.00 0.92 0.95 0.16 NA 0.56 0.89
X7 0.02 0.36 0.00 0.06 0.01 0.56 NA 0.00
X8 0.14 0.65 0.00 0.19 0.08 0.89 0.00 NA

Descomposición de autovalores y autovectores

library(stargazer)
descomposicion<-eigen(Rx$r)
t(descomposicion$values) %>% kable(caption="Autovalores de R(X)",
        align = "c",
        digits = 2) %>% 
  kable_classic_2(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Autovalores de R(X)
3.75 1.93 0.84 0.72 0.34 0.31 0.1 0.01
descomposicion$vectors %>% kable(caption="Autovectores de R(X)",
        align = "c",
        digits = 2) %>% 
  kable_classic_2(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Autovectores de R(X)
-0.37 -0.05 -0.03 0.71 -0.42 0.41 0.12 0.06
-0.22 -0.61 0.05 -0.24 0.05 0.01 0.70 -0.15
0.41 -0.20 -0.27 -0.02 0.36 0.75 -0.05 0.11
0.22 -0.29 0.88 0.04 -0.08 0.16 -0.23 -0.06
0.41 0.18 -0.09 -0.35 -0.75 0.18 0.19 -0.18
0.18 0.61 0.30 0.21 0.32 0.09 0.57 -0.18
0.47 -0.17 -0.01 0.28 -0.09 -0.34 0.26 0.69
0.41 -0.27 -0.21 0.45 0.04 -0.29 -0.07 -0.65

Cálculo usando R:

library(dplyr)
library(factoextra)
library(kableExtra)
library(stargazer)
library(ggplot2)
options(scipen = 99999)
PC<-princomp(x = mat_X,cor = TRUE,fix_sign = FALSE)
factoextra::get_eig(PC) %>% kable(caption="Resumen de PCA",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("hover"))
Resumen de PCA
eigenvalue variance.percent cumulative.variance.percent
Dim.1 3.75 46.92 46.92
Dim.2 1.93 24.11 71.03
Dim.3 0.84 10.45 81.48
Dim.4 0.72 9.04 90.52
Dim.5 0.34 4.26 94.77
Dim.6 0.31 3.81 98.59
Dim.7 0.10 1.24 99.83
Dim.8 0.01 0.17 100.00
fviz_eig(PC,
         choice = "eigenvalue",
         barcolor = "red",
         barfill = "red",
         addlabels = TRUE, 
       )+labs(title = "Gráfico de Sedimentación",subtitle = "Usando princomp, con Autovalores")+
  xlab(label = "Componentes")+
  ylab(label = "Autovalores")+geom_hline(yintercept = 1)

fviz_eig(PC,
         choice = "variance",
         barcolor = "green",
         barfill = "green",
         addlabels = TRUE,
       )+labs(title = "Gráfico de Sedimentación",
              subtitle = "Usando princomp, con %Varianza Explicada")+
  xlab(label = "Componentes")+
  ylab(label = "%Varianza")

Correlación de los componentes con las variables: rij=aj⋅λ−−√j

library(dplyr)
library(kableExtra)
raiz_lambda<-as.matrix(sqrt(descomposicion$values))
autovectores<-descomposicion$vectors
corr_componentes_coordenadas<-vector(mode = "list")
for(j in 1:8){raiz_lambda[j]*autovectores[,j]->corr_componentes_coordenadas[[j]]}
corr_componentes_coordenadas %>% bind_cols()->corr_componentes_coordenadas
names(corr_componentes_coordenadas)<-paste0("Comp",1:8)
corr_componentes_coordenadas %>% as.data.frame() %>% 
  kable(caption="Correlación de X con las componentes",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Correlación de X con las componentes
Comp1 Comp2 Comp3 Comp4 Comp5 Comp6 Comp7 Comp8
-0.72 -0.06 -0.03 0.60 -0.25 0.22 0.04 0.01
-0.43 -0.85 0.04 -0.20 0.03 0.01 0.22 -0.02
0.80 -0.28 -0.25 -0.02 0.21 0.42 -0.02 0.01
0.42 -0.40 0.81 0.03 -0.05 0.09 -0.07 -0.01
0.80 0.25 -0.08 -0.29 -0.44 0.10 0.06 -0.02
0.34 0.84 0.27 0.18 0.19 0.05 0.18 -0.02
0.91 -0.23 -0.01 0.24 -0.05 -0.19 0.08 0.08
0.80 -0.38 -0.20 0.38 0.02 -0.16 -0.02 -0.08

Usando Facto Extra

library(dplyr)
library(factoextra)
library(kableExtra)
variables_pca<-get_pca_var(PC)
variables_pca$coord%>% 
  kable(caption="Correlación de X con las componentes, usando factoextra",
        align = "c",
        digits = 2) %>% 
  kable_material(html_font = "sans-serif") %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Correlación de X con las componentes, usando factoextra
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7 Dim.8
X1 -0.72 -0.06 -0.03 0.60 0.25 0.22 0.04 0.01
X2 -0.43 -0.85 0.04 -0.20 -0.03 0.01 0.22 -0.02
X3 0.80 -0.28 -0.25 -0.02 -0.21 0.42 -0.02 0.01
X4 0.42 -0.40 0.81 0.03 0.05 0.09 -0.07 -0.01
X5 0.80 0.25 -0.08 -0.29 0.44 0.10 0.06 -0.02
X6 0.34 0.84 0.27 0.18 -0.19 0.05 0.18 -0.02
X7 0.91 -0.23 -0.01 0.24 0.05 -0.19 0.08 0.08
X8 0.80 -0.38 -0.20 0.38 -0.02 -0.16 -0.02 -0.08

Representación Gráfica de las correlaciones en los ejes de los componentes

fviz_pca_var(PC,repel = TRUE,axes = c(1,2))

fviz_pca_var(PC,repel = TRUE,axes = c(3,4))

fviz_pca_var(PC,repel = TRUE,axes = c(5,6))

fviz_pca_var(PC,repel = TRUE,axes = c(7,8))

Representación alternativa:

library(corrplot)
corrplot(variables_pca$coord,is.corr = FALSE,method = "square",addCoef.col="black",number.cex = 0.75)

2.4 Análisis Factorial.

En el caso anterior se encontraron unas variables “sintéticas” que pueden sustituir a las variables originales, pero aún no se ha reducido la dimensión de la información. en este apartado se explicarán las características de la técnica de Componentes Principales, en cuanto a su uso dentro del Análisis Factorial.

2.4.2 Análisis Factorial en R

library(psych)
library(corrplot)
library(dplyr)
#Modelo de 2 Factores (sin rotar)
numero_de_factores<-2
modelo_2_factores<-principal(r = Rx$r,
                             nfactors = numero_de_factores,
                             covar = FALSE,
                             rotate = "none")
modelo_2_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none", 
##     covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   h2    u2 com
## X1 -0.72  0.06 0.53 0.472 1.0
## X2 -0.43  0.85 0.91 0.093 1.5
## X3  0.80  0.28 0.72 0.280 1.2
## X4  0.42  0.40 0.33 0.668 2.0
## X5  0.80 -0.25 0.70 0.302 1.2
## X6  0.34 -0.84 0.82 0.176 1.3
## X7  0.91  0.23 0.89 0.108 1.1
## X8  0.80  0.38 0.78 0.217 1.4
## 
##                        PC1  PC2
## SS loadings           3.75 1.93
## Proportion Var        0.47 0.24
## Cumulative Var        0.47 0.71
## Proportion Explained  0.66 0.34
## Cumulative Proportion 0.66 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.09 
## 
## Fit based upon off diagonal values = 0.96
correlaciones_modelo<-variables_pca$coord


corrplot(correlaciones_modelo[,1:numero_de_factores],
         is.corr = FALSE,
         method = "square",addCoef.col="black",number.cex = 0.75)

library(psych)
library(corrplot)
library(dplyr)
#Modelo de 3 Factores (sin rotar)
numero_de_factores<-3
modelo_3_factores<-principal(r = Rx$r,
                             nfactors = numero_de_factores,
                             covar = FALSE,
                             rotate = "none")
modelo_3_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none", 
##     covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   PC3   h2    u2 com
## X1 -0.72  0.06 -0.03 0.53 0.472 1.0
## X2 -0.43  0.85  0.04 0.91 0.092 1.5
## X3  0.80  0.28 -0.25 0.78 0.219 1.4
## X4  0.42  0.40  0.81 0.98 0.017 2.0
## X5  0.80 -0.25 -0.08 0.71 0.295 1.2
## X6  0.34 -0.84  0.27 0.90 0.101 1.6
## X7  0.91  0.23 -0.01 0.89 0.108 1.1
## X8  0.80  0.38 -0.20 0.82 0.179 1.6
## 
##                        PC1  PC2  PC3
## SS loadings           3.75 1.93 0.84
## Proportion Var        0.47 0.24 0.10
## Cumulative Var        0.47 0.71 0.81
## Proportion Explained  0.58 0.30 0.13
## Cumulative Proportion 0.58 0.87 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
## 
## Fit based upon off diagonal values = 0.97
correlaciones_modelo<-variables_pca$coord


corrplot(correlaciones_modelo[,1:numero_de_factores],
         is.corr = FALSE,
         method = "square",addCoef.col="black",number.cex = 0.75)

library(psych)
library(corrplot)
library(dplyr)
#Modelo de 4 Factores (sin rotar)
numero_de_factores<-4
modelo_4_factores<-principal(r = Rx$r,
                             nfactors = numero_de_factores,
                             covar = FALSE,
                             rotate = "none")
modelo_4_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none", 
##     covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   PC3   PC4   h2    u2 com
## X1 -0.72  0.06 -0.03  0.60 0.89 0.112 2.0
## X2 -0.43  0.85  0.04 -0.20 0.95 0.050 1.6
## X3  0.80  0.28 -0.25 -0.02 0.78 0.219 1.4
## X4  0.42  0.40  0.81  0.03 0.98 0.016 2.0
## X5  0.80 -0.25 -0.08 -0.29 0.79 0.208 1.5
## X6  0.34 -0.84  0.27  0.18 0.93 0.070 1.7
## X7  0.91  0.23 -0.01  0.24 0.95 0.052 1.3
## X8  0.80  0.38 -0.20  0.38 0.97 0.032 2.1
## 
##                        PC1  PC2  PC3  PC4
## SS loadings           3.75 1.93 0.84 0.72
## Proportion Var        0.47 0.24 0.10 0.09
## Cumulative Var        0.47 0.71 0.81 0.91
## Proportion Explained  0.52 0.27 0.12 0.10
## Cumulative Proportion 0.52 0.78 0.90 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.04 
## 
## Fit based upon off diagonal values = 0.99
correlaciones_modelo<-variables_pca$coord


corrplot(correlaciones_modelo[,1:numero_de_factores],
         is.corr = FALSE,
         method = "square",addCoef.col="black",number.cex = 0.75)