2.2.2. Cálculo de V(X):
Cálculo “Manual”
library(dplyr)
library(kableExtra)
centrado<-function(x){
x-mean(x)
}
Xcentrada<-apply(X = mat_X,MARGIN = 2,centrado)
Xcentrada %>% head() %>%
kable(caption ="Matriz de Variables centradas:",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif")
Matriz de Variables centradas:
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
-49.67
|
-0.67
|
303.89
|
1463.5
|
2.94
|
-60.17
|
196.72
|
0.71
|
44.33
|
4.33
|
43.89
|
-1484.5
|
-3.06
|
-97.17
|
-55.28
|
0.01
|
15.33
|
6.33
|
3.89
|
-2433.5
|
-4.06
|
-128.17
|
-124.28
|
-0.39
|
10.33
|
1.33
|
313.89
|
-2004.5
|
-1.06
|
-134.17
|
186.72
|
1.11
|
32.33
|
-0.67
|
-266.11
|
-1146.5
|
-7.06
|
-103.17
|
-122.28
|
-0.39
|
-6.67
|
9.33
|
23.89
|
3675.5
|
-5.06
|
-119.17
|
-51.28
|
-0.29
|
n_obs<-nrow(mat_X)
mat_V<-t(Xcentrada)%*%Xcentrada/(n_obs-1)
mat_V %>% kable(caption ="Cálculo de V(X) forma manual:" ,
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de V(X) forma manual:
|
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
X1
|
716.12
|
45.06
|
-2689.61
|
-16082.06
|
-121.63
|
-1019.06
|
-1844.37
|
-5.15
|
X2
|
45.06
|
46.94
|
-144.31
|
2756.71
|
-24.63
|
-938.41
|
-205.25
|
-0.42
|
X3
|
-2689.61
|
-144.31
|
36389.87
|
123889.71
|
740.82
|
838.33
|
17499.38
|
73.48
|
X4
|
-16082.06
|
2756.71
|
123889.71
|
5736372.38
|
3078.97
|
6672.44
|
140343.50
|
412.79
|
X5
|
-121.63
|
-24.63
|
740.82
|
3078.97
|
51.47
|
405.58
|
565.22
|
1.59
|
X6
|
-1019.06
|
-938.41
|
838.33
|
6672.44
|
405.58
|
26579.56
|
3149.77
|
-2.96
|
X7
|
-1844.37
|
-205.25
|
17499.38
|
140343.50
|
565.22
|
3149.77
|
16879.39
|
64.51
|
X8
|
-5.15
|
-0.42
|
73.48
|
412.79
|
1.59
|
-2.96
|
64.51
|
0.28
|
Cálculo con R base
library(dplyr)
library(kableExtra)
cov(mat_X) %>%
kable(caption="Cálculo de V(X) a través de R base",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de V(X) a través de R base
|
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
X1
|
716.12
|
45.06
|
-2689.61
|
-16082.06
|
-121.63
|
-1019.06
|
-1844.37
|
-5.15
|
X2
|
45.06
|
46.94
|
-144.31
|
2756.71
|
-24.63
|
-938.41
|
-205.25
|
-0.42
|
X3
|
-2689.61
|
-144.31
|
36389.87
|
123889.71
|
740.82
|
838.33
|
17499.38
|
73.48
|
X4
|
-16082.06
|
2756.71
|
123889.71
|
5736372.38
|
3078.97
|
6672.44
|
140343.50
|
412.79
|
X5
|
-121.63
|
-24.63
|
740.82
|
3078.97
|
51.47
|
405.58
|
565.22
|
1.59
|
X6
|
-1019.06
|
-938.41
|
838.33
|
6672.44
|
405.58
|
26579.56
|
3149.77
|
-2.96
|
X7
|
-1844.37
|
-205.25
|
17499.38
|
140343.50
|
565.22
|
3149.77
|
16879.39
|
64.51
|
X8
|
-5.15
|
-0.42
|
73.48
|
412.79
|
1.59
|
-2.96
|
64.51
|
0.28
|
2.2.3. Cálculo de R(X)
Cálculo “Manual”
Zx<-scale(x = mat_X,center =TRUE)
Zx %>% head() %>%
kable(caption ="Matriz de Variables Estandarizadas:",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif")
Matriz de Variables Estandarizadas:
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
-1.86
|
-0.10
|
1.59
|
0.61
|
0.41
|
-0.37
|
1.51
|
1.34
|
1.66
|
0.63
|
0.23
|
-0.62
|
-0.43
|
-0.60
|
-0.43
|
0.02
|
0.57
|
0.92
|
0.02
|
-1.02
|
-0.57
|
-0.79
|
-0.96
|
-0.73
|
0.39
|
0.19
|
1.65
|
-0.84
|
-0.15
|
-0.82
|
1.44
|
2.09
|
1.21
|
-0.10
|
-1.39
|
-0.48
|
-0.98
|
-0.63
|
-0.94
|
-0.73
|
-0.25
|
1.36
|
0.13
|
1.53
|
-0.70
|
-0.73
|
-0.39
|
-0.54
|
n_obs<-nrow(mat_X)
mat_R<-t(Zx)%*%Zx/(n_obs-1)
mat_R %>% kable(caption ="Cálculo de R(X) forma manual:" ,
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de R(X) forma manual:
|
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
X1
|
1.00
|
0.25
|
-0.53
|
-0.25
|
-0.63
|
-0.23
|
-0.53
|
-0.36
|
X2
|
0.25
|
1.00
|
-0.11
|
0.17
|
-0.50
|
-0.84
|
-0.23
|
-0.12
|
X3
|
-0.53
|
-0.11
|
1.00
|
0.27
|
0.54
|
0.03
|
0.71
|
0.73
|
X4
|
-0.25
|
0.17
|
0.27
|
1.00
|
0.18
|
0.02
|
0.45
|
0.32
|
X5
|
-0.63
|
-0.50
|
0.54
|
0.18
|
1.00
|
0.35
|
0.61
|
0.42
|
X6
|
-0.23
|
-0.84
|
0.03
|
0.02
|
0.35
|
1.00
|
0.15
|
-0.03
|
X7
|
-0.53
|
-0.23
|
0.71
|
0.45
|
0.61
|
0.15
|
1.00
|
0.93
|
X8
|
-0.36
|
-0.12
|
0.73
|
0.32
|
0.42
|
-0.03
|
0.93
|
1.00
|
Cálculo usando R base
library(dplyr)
library(kableExtra)
cor(mat_X) %>%
kable(caption="Cálculo de R(X) a través de R base",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Cálculo de R(X) a través de R base
|
X1
|
X2
|
X3
|
X4
|
X5
|
X6
|
X7
|
X8
|
X1
|
1.00
|
0.25
|
-0.53
|
-0.25
|
-0.63
|
-0.23
|
-0.53
|
-0.36
|
X2
|
0.25
|
1.00
|
-0.11
|
0.17
|
-0.50
|
-0.84
|
-0.23
|
-0.12
|
X3
|
-0.53
|
-0.11
|
1.00
|
0.27
|
0.54
|
0.03
|
0.71
|
0.73
|
X4
|
-0.25
|
0.17
|
0.27
|
1.00
|
0.18
|
0.02
|
0.45
|
0.32
|
X5
|
-0.63
|
-0.50
|
0.54
|
0.18
|
1.00
|
0.35
|
0.61
|
0.42
|
X6
|
-0.23
|
-0.84
|
0.03
|
0.02
|
0.35
|
1.00
|
0.15
|
-0.03
|
X7
|
-0.53
|
-0.23
|
0.71
|
0.45
|
0.61
|
0.15
|
1.00
|
0.93
|
X8
|
-0.36
|
-0.12
|
0.73
|
0.32
|
0.42
|
-0.03
|
0.93
|
1.00
|
2.2.4 Versiones gráficas de R(X)
Pueden ser especialmente útiles en la presentación de reportes, además de proveer una vista rápida de algunas características propias de la correlación entre las variables.
2.2.4.2. Usando el paquete corrplot
library(corrplot)
library(grDevices)
library(Hmisc)
Mat_R<-rcorr(as.matrix(mat_X))
corrplot(Mat_R$r,
p.mat = Mat_R$r,
type="upper",
tl.col="black",
tl.srt = 20,
pch.col = "blue",
insig = "p-value",
sig.level = -1,
col = terrain.colors(100))

Descomposición de autovalores y autovectores
library(stargazer)
descomposicion<-eigen(Rx$r)
t(descomposicion$values) %>% kable(caption="Autovalores de R(X)",
align = "c",
digits = 2) %>%
kable_classic_2(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Autovalores de R(X)
3.75
|
1.93
|
0.84
|
0.72
|
0.34
|
0.31
|
0.1
|
0.01
|
descomposicion$vectors %>% kable(caption="Autovectores de R(X)",
align = "c",
digits = 2) %>%
kable_classic_2(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Autovectores de R(X)
-0.37
|
-0.05
|
-0.03
|
0.71
|
-0.42
|
0.41
|
0.12
|
0.06
|
-0.22
|
-0.61
|
0.05
|
-0.24
|
0.05
|
0.01
|
0.70
|
-0.15
|
0.41
|
-0.20
|
-0.27
|
-0.02
|
0.36
|
0.75
|
-0.05
|
0.11
|
0.22
|
-0.29
|
0.88
|
0.04
|
-0.08
|
0.16
|
-0.23
|
-0.06
|
0.41
|
0.18
|
-0.09
|
-0.35
|
-0.75
|
0.18
|
0.19
|
-0.18
|
0.18
|
0.61
|
0.30
|
0.21
|
0.32
|
0.09
|
0.57
|
-0.18
|
0.47
|
-0.17
|
-0.01
|
0.28
|
-0.09
|
-0.34
|
0.26
|
0.69
|
0.41
|
-0.27
|
-0.21
|
0.45
|
0.04
|
-0.29
|
-0.07
|
-0.65
|
Cálculo usando R:
library(dplyr)
library(factoextra)
library(kableExtra)
library(stargazer)
library(ggplot2)
options(scipen = 99999)
PC<-princomp(x = mat_X,cor = TRUE,fix_sign = FALSE)
factoextra::get_eig(PC) %>% kable(caption="Resumen de PCA",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("hover"))
Resumen de PCA
|
eigenvalue
|
variance.percent
|
cumulative.variance.percent
|
Dim.1
|
3.75
|
46.92
|
46.92
|
Dim.2
|
1.93
|
24.11
|
71.03
|
Dim.3
|
0.84
|
10.45
|
81.48
|
Dim.4
|
0.72
|
9.04
|
90.52
|
Dim.5
|
0.34
|
4.26
|
94.77
|
Dim.6
|
0.31
|
3.81
|
98.59
|
Dim.7
|
0.10
|
1.24
|
99.83
|
Dim.8
|
0.01
|
0.17
|
100.00
|
fviz_eig(PC,
choice = "eigenvalue",
barcolor = "red",
barfill = "red",
addlabels = TRUE,
)+labs(title = "Gráfico de Sedimentación",subtitle = "Usando princomp, con Autovalores")+
xlab(label = "Componentes")+
ylab(label = "Autovalores")+geom_hline(yintercept = 1)

fviz_eig(PC,
choice = "variance",
barcolor = "green",
barfill = "green",
addlabels = TRUE,
)+labs(title = "Gráfico de Sedimentación",
subtitle = "Usando princomp, con %Varianza Explicada")+
xlab(label = "Componentes")+
ylab(label = "%Varianza")

Correlación de los componentes con las variables: rij=aj⋅λ−−√j
library(dplyr)
library(kableExtra)
raiz_lambda<-as.matrix(sqrt(descomposicion$values))
autovectores<-descomposicion$vectors
corr_componentes_coordenadas<-vector(mode = "list")
for(j in 1:8){raiz_lambda[j]*autovectores[,j]->corr_componentes_coordenadas[[j]]}
corr_componentes_coordenadas %>% bind_cols()->corr_componentes_coordenadas
names(corr_componentes_coordenadas)<-paste0("Comp",1:8)
corr_componentes_coordenadas %>% as.data.frame() %>%
kable(caption="Correlación de X con las componentes",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Correlación de X con las componentes
Comp1
|
Comp2
|
Comp3
|
Comp4
|
Comp5
|
Comp6
|
Comp7
|
Comp8
|
-0.72
|
-0.06
|
-0.03
|
0.60
|
-0.25
|
0.22
|
0.04
|
0.01
|
-0.43
|
-0.85
|
0.04
|
-0.20
|
0.03
|
0.01
|
0.22
|
-0.02
|
0.80
|
-0.28
|
-0.25
|
-0.02
|
0.21
|
0.42
|
-0.02
|
0.01
|
0.42
|
-0.40
|
0.81
|
0.03
|
-0.05
|
0.09
|
-0.07
|
-0.01
|
0.80
|
0.25
|
-0.08
|
-0.29
|
-0.44
|
0.10
|
0.06
|
-0.02
|
0.34
|
0.84
|
0.27
|
0.18
|
0.19
|
0.05
|
0.18
|
-0.02
|
0.91
|
-0.23
|
-0.01
|
0.24
|
-0.05
|
-0.19
|
0.08
|
0.08
|
0.80
|
-0.38
|
-0.20
|
0.38
|
0.02
|
-0.16
|
-0.02
|
-0.08
|
Usando Facto Extra
library(dplyr)
library(factoextra)
library(kableExtra)
variables_pca<-get_pca_var(PC)
variables_pca$coord%>%
kable(caption="Correlación de X con las componentes, usando factoextra",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Correlación de X con las componentes, usando factoextra
|
Dim.1
|
Dim.2
|
Dim.3
|
Dim.4
|
Dim.5
|
Dim.6
|
Dim.7
|
Dim.8
|
X1
|
-0.72
|
-0.06
|
-0.03
|
0.60
|
0.25
|
0.22
|
0.04
|
0.01
|
X2
|
-0.43
|
-0.85
|
0.04
|
-0.20
|
-0.03
|
0.01
|
0.22
|
-0.02
|
X3
|
0.80
|
-0.28
|
-0.25
|
-0.02
|
-0.21
|
0.42
|
-0.02
|
0.01
|
X4
|
0.42
|
-0.40
|
0.81
|
0.03
|
0.05
|
0.09
|
-0.07
|
-0.01
|
X5
|
0.80
|
0.25
|
-0.08
|
-0.29
|
0.44
|
0.10
|
0.06
|
-0.02
|
X6
|
0.34
|
0.84
|
0.27
|
0.18
|
-0.19
|
0.05
|
0.18
|
-0.02
|
X7
|
0.91
|
-0.23
|
-0.01
|
0.24
|
0.05
|
-0.19
|
0.08
|
0.08
|
X8
|
0.80
|
-0.38
|
-0.20
|
0.38
|
-0.02
|
-0.16
|
-0.02
|
-0.08
|
Representación Gráfica de las correlaciones en los ejes de los componentes
fviz_pca_var(PC,repel = TRUE,axes = c(1,2))

fviz_pca_var(PC,repel = TRUE,axes = c(3,4))

fviz_pca_var(PC,repel = TRUE,axes = c(5,6))

fviz_pca_var(PC,repel = TRUE,axes = c(7,8))

Representación alternativa:
library(corrplot)
corrplot(variables_pca$coord,is.corr = FALSE,method = "square",addCoef.col="black",number.cex = 0.75)

2.4 Análisis Factorial.
En el caso anterior se encontraron unas variables “sintéticas” que pueden sustituir a las variables originales, pero aún no se ha reducido la dimensión de la información. en este apartado se explicarán las características de la técnica de Componentes Principales, en cuanto a su uso dentro del Análisis Factorial.
2.4.2 Análisis Factorial en R
library(psych)
library(corrplot)
library(dplyr)
#Modelo de 2 Factores (sin rotar)
numero_de_factores<-2
modelo_2_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "none")
modelo_2_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 h2 u2 com
## X1 -0.72 0.06 0.53 0.472 1.0
## X2 -0.43 0.85 0.91 0.093 1.5
## X3 0.80 0.28 0.72 0.280 1.2
## X4 0.42 0.40 0.33 0.668 2.0
## X5 0.80 -0.25 0.70 0.302 1.2
## X6 0.34 -0.84 0.82 0.176 1.3
## X7 0.91 0.23 0.89 0.108 1.1
## X8 0.80 0.38 0.78 0.217 1.4
##
## PC1 PC2
## SS loadings 3.75 1.93
## Proportion Var 0.47 0.24
## Cumulative Var 0.47 0.71
## Proportion Explained 0.66 0.34
## Cumulative Proportion 0.66 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.09
##
## Fit based upon off diagonal values = 0.96
correlaciones_modelo<-variables_pca$coord
corrplot(correlaciones_modelo[,1:numero_de_factores],
is.corr = FALSE,
method = "square",addCoef.col="black",number.cex = 0.75)

library(psych)
library(corrplot)
library(dplyr)
#Modelo de 3 Factores (sin rotar)
numero_de_factores<-3
modelo_3_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "none")
modelo_3_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## X1 -0.72 0.06 -0.03 0.53 0.472 1.0
## X2 -0.43 0.85 0.04 0.91 0.092 1.5
## X3 0.80 0.28 -0.25 0.78 0.219 1.4
## X4 0.42 0.40 0.81 0.98 0.017 2.0
## X5 0.80 -0.25 -0.08 0.71 0.295 1.2
## X6 0.34 -0.84 0.27 0.90 0.101 1.6
## X7 0.91 0.23 -0.01 0.89 0.108 1.1
## X8 0.80 0.38 -0.20 0.82 0.179 1.6
##
## PC1 PC2 PC3
## SS loadings 3.75 1.93 0.84
## Proportion Var 0.47 0.24 0.10
## Cumulative Var 0.47 0.71 0.81
## Proportion Explained 0.58 0.30 0.13
## Cumulative Proportion 0.58 0.87 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
##
## Fit based upon off diagonal values = 0.97
correlaciones_modelo<-variables_pca$coord
corrplot(correlaciones_modelo[,1:numero_de_factores],
is.corr = FALSE,
method = "square",addCoef.col="black",number.cex = 0.75)

library(psych)
library(corrplot)
library(dplyr)
#Modelo de 4 Factores (sin rotar)
numero_de_factores<-4
modelo_4_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "none")
modelo_4_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 h2 u2 com
## X1 -0.72 0.06 -0.03 0.60 0.89 0.112 2.0
## X2 -0.43 0.85 0.04 -0.20 0.95 0.050 1.6
## X3 0.80 0.28 -0.25 -0.02 0.78 0.219 1.4
## X4 0.42 0.40 0.81 0.03 0.98 0.016 2.0
## X5 0.80 -0.25 -0.08 -0.29 0.79 0.208 1.5
## X6 0.34 -0.84 0.27 0.18 0.93 0.070 1.7
## X7 0.91 0.23 -0.01 0.24 0.95 0.052 1.3
## X8 0.80 0.38 -0.20 0.38 0.97 0.032 2.1
##
## PC1 PC2 PC3 PC4
## SS loadings 3.75 1.93 0.84 0.72
## Proportion Var 0.47 0.24 0.10 0.09
## Cumulative Var 0.47 0.71 0.81 0.91
## Proportion Explained 0.52 0.27 0.12 0.10
## Cumulative Proportion 0.52 0.78 0.90 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 4 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.04
##
## Fit based upon off diagonal values = 0.99
correlaciones_modelo<-variables_pca$coord
corrplot(correlaciones_modelo[,1:numero_de_factores],
is.corr = FALSE,
method = "square",addCoef.col="black",number.cex = 0.75)
