En el archivo datos_prob_7_3.txt, se puede ver una lista de variables que caracterizan el grado de desarrollo de algunos países del mundo. Las variables son:
X1 = Tasa de mortalidad infantil por cada 1000 nacidos vivos, X2 = Porcentaje de mujeres en la población activa, X3 = Producto Nacional Bruto (PNB) per capita en 1995 (en $), X4 = Producción de electricidad (en millones de kw/h), X5 = Promedio de líneas telefónicas por cada 1000 habitantes, X6 = Consumo de agua per capita en m3 (de 1980 a 1995), X7 = Consumo de energía per capita en 1994, X8 = Emisión de CO2 per capita en 1992 (en Tm).
*Nota por el momento no se normalizarán las variables en el esquema 0-1, que se vio en el apartado introductorio.
library(readr)
library(kableExtra)
url_link<-"http://halweb.uc3m.es/esp/Personal/personas/agrane/libro/ficheros_datos/capitulo_7/datos_prob_7_3.txt"
mat_X<-read_table2(url_link,col_names = FALSE)
mat_X %>% head() %>%
kable(caption ="Matriz de información:" ,align = "c",digits = 6) %>%
kable_material(html_font = "sans-serif")
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 |
|---|---|---|---|---|---|---|---|
| 30 | 41 | 670 | 3903 | 12 | 94 | 341 | 1.2 |
| 124 | 46 | 410 | 955 | 6 | 57 | 89 | 0.5 |
| 95 | 48 | 370 | 6 | 5 | 26 | 20 | 0.1 |
| 90 | 43 | 680 | 435 | 8 | 20 | 331 | 1.6 |
| 112 | 41 | 100 | 1293 | 2 | 51 | 22 | 0.1 |
| 73 | 51 | 390 | 6115 | 4 | 35 | 93 | 0.2 |
library(dplyr)
centrado<-function(x){
x-mean(x)
}
# ese centrado es porque su promedio sera 0 por su propiedad estadistica
Xcentrada<-apply(X = mat_X,MARGIN = 2,centrado) # es un aplly pero dirigido a matriz, se necesita la matriz, lo segundo es el margen (margin) si es para filas sera 1 si es para columnas es 2, y el tercer argumento es la funcion
# X centrada es la matriz de excesos osea que se le resto la media a la matriz de informacion
Xcentrada %>% head() %>%
kable(caption ="Matriz de Variables centradas:",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif")
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 |
|---|---|---|---|---|---|---|---|
| -49.67 | -0.67 | 303.89 | 1463.5 | 2.94 | -60.17 | 196.72 | 0.71 |
| 44.33 | 4.33 | 43.89 | -1484.5 | -3.06 | -97.17 | -55.28 | 0.01 |
| 15.33 | 6.33 | 3.89 | -2433.5 | -4.06 | -128.17 | -124.28 | -0.39 |
| 10.33 | 1.33 | 313.89 | -2004.5 | -1.06 | -134.17 | 186.72 | 1.11 |
| 32.33 | -0.67 | -266.11 | -1146.5 | -7.06 | -103.17 | -122.28 | -0.39 |
| -6.67 | 9.33 | 23.89 | 3675.5 | -5.06 | -119.17 | -51.28 | -0.29 |
n_obs<-nrow(mat_X) #asi obtenemos el numero de filas
mat_V<-t(Xcentrada)%*%Xcentrada/(n_obs-1) # la formula del inicio, como son matrices se usa (%*%)
mat_V %>% kable(caption ="Cálculo de V(X) forma manual:" ,
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | 716.12 | 45.06 | -2689.61 | -16082.06 | -121.63 | -1019.06 | -1844.37 | -5.15 |
| X2 | 45.06 | 46.94 | -144.31 | 2756.71 | -24.63 | -938.41 | -205.25 | -0.42 |
| X3 | -2689.61 | -144.31 | 36389.87 | 123889.71 | 740.82 | 838.33 | 17499.38 | 73.48 |
| X4 | -16082.06 | 2756.71 | 123889.71 | 5736372.38 | 3078.97 | 6672.44 | 140343.50 | 412.79 |
| X5 | -121.63 | -24.63 | 740.82 | 3078.97 | 51.47 | 405.58 | 565.22 | 1.59 |
| X6 | -1019.06 | -938.41 | 838.33 | 6672.44 | 405.58 | 26579.56 | 3149.77 | -2.96 |
| X7 | -1844.37 | -205.25 | 17499.38 | 140343.50 | 565.22 | 3149.77 | 16879.39 | 64.51 |
| X8 | -5.15 | -0.42 | 73.48 | 412.79 | 1.59 | -2.96 | 64.51 | 0.28 |
#la matriz de varianza-covarianza y la matriz de correlacion son simetricas (todos los autovalores son reales)
cov(mat_X) %>%
kable(caption="Cálculo de V(X) a través de R base",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | 716.12 | 45.06 | -2689.61 | -16082.06 | -121.63 | -1019.06 | -1844.37 | -5.15 |
| X2 | 45.06 | 46.94 | -144.31 | 2756.71 | -24.63 | -938.41 | -205.25 | -0.42 |
| X3 | -2689.61 | -144.31 | 36389.87 | 123889.71 | 740.82 | 838.33 | 17499.38 | 73.48 |
| X4 | -16082.06 | 2756.71 | 123889.71 | 5736372.38 | 3078.97 | 6672.44 | 140343.50 | 412.79 |
| X5 | -121.63 | -24.63 | 740.82 | 3078.97 | 51.47 | 405.58 | 565.22 | 1.59 |
| X6 | -1019.06 | -938.41 | 838.33 | 6672.44 | 405.58 | 26579.56 | 3149.77 | -2.96 |
| X7 | -1844.37 | -205.25 | 17499.38 | 140343.50 | 565.22 | 3149.77 | 16879.39 | 64.51 |
| X8 | -5.15 | -0.42 | 73.48 | 412.79 | 1.59 | -2.96 | 64.51 | 0.28 |
Zx<-scale(x = mat_X,center =TRUE) # scale el primero es el dataframe a aplicar la funcion, y center es que le restara la media y lo dividira.
Zx %>% head() %>%
kable(caption ="Matriz de Variables Estandarizadas:",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif")
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 |
|---|---|---|---|---|---|---|---|
| -1.86 | -0.10 | 1.59 | 0.61 | 0.41 | -0.37 | 1.51 | 1.34 |
| 1.66 | 0.63 | 0.23 | -0.62 | -0.43 | -0.60 | -0.43 | 0.02 |
| 0.57 | 0.92 | 0.02 | -1.02 | -0.57 | -0.79 | -0.96 | -0.73 |
| 0.39 | 0.19 | 1.65 | -0.84 | -0.15 | -0.82 | 1.44 | 2.09 |
| 1.21 | -0.10 | -1.39 | -0.48 | -0.98 | -0.63 | -0.94 | -0.73 |
| -0.25 | 1.36 | 0.13 | 1.53 | -0.70 | -0.73 | -0.39 | -0.54 |
n_obs<-nrow(mat_X)
mat_R<-t(Zx)%*%Zx/(n_obs-1)
mat_R %>% kable(caption ="Cálculo de R(X) forma manual:" ,
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | 1.00 | 0.25 | -0.53 | -0.25 | -0.63 | -0.23 | -0.53 | -0.36 |
| X2 | 0.25 | 1.00 | -0.11 | 0.17 | -0.50 | -0.84 | -0.23 | -0.12 |
| X3 | -0.53 | -0.11 | 1.00 | 0.27 | 0.54 | 0.03 | 0.71 | 0.73 |
| X4 | -0.25 | 0.17 | 0.27 | 1.00 | 0.18 | 0.02 | 0.45 | 0.32 |
| X5 | -0.63 | -0.50 | 0.54 | 0.18 | 1.00 | 0.35 | 0.61 | 0.42 |
| X6 | -0.23 | -0.84 | 0.03 | 0.02 | 0.35 | 1.00 | 0.15 | -0.03 |
| X7 | -0.53 | -0.23 | 0.71 | 0.45 | 0.61 | 0.15 | 1.00 | 0.93 |
| X8 | -0.36 | -0.12 | 0.73 | 0.32 | 0.42 | -0.03 | 0.93 | 1.00 |
cor(mat_X) %>%
kable(caption="Cálculo de R(X) a través de R base",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | 1.00 | 0.25 | -0.53 | -0.25 | -0.63 | -0.23 | -0.53 | -0.36 |
| X2 | 0.25 | 1.00 | -0.11 | 0.17 | -0.50 | -0.84 | -0.23 | -0.12 |
| X3 | -0.53 | -0.11 | 1.00 | 0.27 | 0.54 | 0.03 | 0.71 | 0.73 |
| X4 | -0.25 | 0.17 | 0.27 | 1.00 | 0.18 | 0.02 | 0.45 | 0.32 |
| X5 | -0.63 | -0.50 | 0.54 | 0.18 | 1.00 | 0.35 | 0.61 | 0.42 |
| X6 | -0.23 | -0.84 | 0.03 | 0.02 | 0.35 | 1.00 | 0.15 | -0.03 |
| X7 | -0.53 | -0.23 | 0.71 | 0.45 | 0.61 | 0.15 | 1.00 | 0.93 |
| X8 | -0.36 | -0.12 | 0.73 | 0.32 | 0.42 | -0.03 | 0.93 | 1.00 |
library(PerformanceAnalytics)
chart.Correlation(as.matrix(mat_X),histogram = TRUE,pch=12)
library(corrplot)
library(grDevices)
library(Hmisc)
Mat_R<-rcorr(as.matrix(mat_X))
corrplot(Mat_R$r,
p.mat = Mat_R$r,
type="upper",
tl.col="black",
tl.srt = 20,
pch.col = "blue",
insig = "p-value",
sig.level = -1,
col = terrain.colors(100))
# lo verde es correlacion negativa
library(corrplot)
library(grDevices)
library(Hmisc)
col4 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "#7FFF7F",
"cyan", "#007FFF", "blue", "#00007F"))
Mat_R<-rcorr(as.matrix(mat_X))
corrplot(Mat_R$r,
p.mat = Mat_R$r,
type="full",
tl.col="red",
tl.srt = 15,
pch.col = "black",
insig = "p-value",
sig.level = -1,
order = "hclust",
method = "circle",
addrect = 3,
col = col4(100))
col2 <- colorRampPalette(c("#67001F", "#B2182B", "#D6604D", "#F4A582",
"#FDDBC7", "#FFFFFF", "#D1E5F0", "#92C5DE",
"#4393C3", "#2166AC", "#053061"))
Mat_R<-rcorr(as.matrix(mat_X))
corrplot(Mat_R$r,
p.mat = Mat_R$r,
method="square",
type="full",
order="hclust",
addrect=2,
tl.col="black",
tl.srt = 20,
pch.col = "Black",
insig = "p-value",
sig.level = -1,
col = col2(50))
library(Hmisc)
Rx<-mat_X %>% as.matrix() %>% rcorr()
Rx$r %>% kable(caption="Matriz R(X)",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | 1.00 | 0.25 | -0.53 | -0.25 | -0.63 | -0.23 | -0.53 | -0.36 |
| X2 | 0.25 | 1.00 | -0.11 | 0.17 | -0.50 | -0.84 | -0.23 | -0.12 |
| X3 | -0.53 | -0.11 | 1.00 | 0.27 | 0.54 | 0.03 | 0.71 | 0.73 |
| X4 | -0.25 | 0.17 | 0.27 | 1.00 | 0.18 | 0.02 | 0.45 | 0.32 |
| X5 | -0.63 | -0.50 | 0.54 | 0.18 | 1.00 | 0.35 | 0.61 | 0.42 |
| X6 | -0.23 | -0.84 | 0.03 | 0.02 | 0.35 | 1.00 | 0.15 | -0.03 |
| X7 | -0.53 | -0.23 | 0.71 | 0.45 | 0.61 | 0.15 | 1.00 | 0.93 |
| X8 | -0.36 | -0.12 | 0.73 | 0.32 | 0.42 | -0.03 | 0.93 | 1.00 |
Rx$P %>% kable(caption="p-values de R(X)",
align = "c",
digits = 2) %>%
kable_classic_2(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | NA | 0.33 | 0.02 | 0.32 | 0.00 | 0.35 | 0.02 | 0.14 |
| X2 | 0.33 | NA | 0.66 | 0.51 | 0.03 | 0.00 | 0.36 | 0.65 |
| X3 | 0.02 | 0.66 | NA | 0.28 | 0.02 | 0.92 | 0.00 | 0.00 |
| X4 | 0.32 | 0.51 | 0.28 | NA | 0.48 | 0.95 | 0.06 | 0.19 |
| X5 | 0.00 | 0.03 | 0.02 | 0.48 | NA | 0.16 | 0.01 | 0.08 |
| X6 | 0.35 | 0.00 | 0.92 | 0.95 | 0.16 | NA | 0.56 | 0.89 |
| X7 | 0.02 | 0.36 | 0.00 | 0.06 | 0.01 | 0.56 | NA | 0.00 |
| X8 | 0.14 | 0.65 | 0.00 | 0.19 | 0.08 | 0.89 | 0.00 | NA |
descomposicion<-eigen(Rx$r)
t(descomposicion$values) %>% kable(caption="Autovalores de R(X)",
align = "c",
digits = 2) %>%
kable_classic_2(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| 3.75 | 1.93 | 0.84 | 0.72 | 0.34 | 0.31 | 0.1 | 0.01 |
descomposicion$vectors %>% kable(caption="Autovectores de R(X)",
align = "c",
digits = 2) %>%
kable_classic_2(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| -0.37 | -0.05 | -0.03 | 0.71 | -0.42 | 0.41 | 0.12 | 0.06 |
| -0.22 | -0.61 | 0.05 | -0.24 | 0.05 | 0.01 | 0.70 | -0.15 |
| 0.41 | -0.20 | -0.27 | -0.02 | 0.36 | 0.75 | -0.05 | 0.11 |
| 0.22 | -0.29 | 0.88 | 0.04 | -0.08 | 0.16 | -0.23 | -0.06 |
| 0.41 | 0.18 | -0.09 | -0.35 | -0.75 | 0.18 | 0.19 | -0.18 |
| 0.18 | 0.61 | 0.30 | 0.21 | 0.32 | 0.09 | 0.57 | -0.18 |
| 0.47 | -0.17 | -0.01 | 0.28 | -0.09 | -0.34 | 0.26 | 0.69 |
| 0.41 | -0.27 | -0.21 | 0.45 | 0.04 | -0.29 | -0.07 | -0.65 |
library(dplyr)
library(factoextra)
library(kableExtra)
library(stargazer)
library(ggplot2)
options(scipen = 99999)
PC<-princomp(x = mat_X,cor = TRUE,fix_sign = FALSE)
factoextra::get_eig(PC) %>% kable(caption="Resumen de PCA",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("hover"))
| eigenvalue | variance.percent | cumulative.variance.percent | |
|---|---|---|---|
| Dim.1 | 3.75 | 46.92 | 46.92 |
| Dim.2 | 1.93 | 24.11 | 71.03 |
| Dim.3 | 0.84 | 10.45 | 81.48 |
| Dim.4 | 0.72 | 9.04 | 90.52 |
| Dim.5 | 0.34 | 4.26 | 94.77 |
| Dim.6 | 0.31 | 3.81 | 98.59 |
| Dim.7 | 0.10 | 1.24 | 99.83 |
| Dim.8 | 0.01 | 0.17 | 100.00 |
## nota la suma de todos los autovalores debe de dar el numero de autovalores, si son 8, la suma de los autovalores debe de dar 8.
fviz_eig(PC,
choice = "eigenvalue",
barcolor = "red",
barfill = "red",
addlabels = TRUE,
)+labs(title = "Gráfico de Sedimentación",subtitle = "Usando princomp, con Autovalores")+
xlab(label = "Componentes")+
ylab(label = "Autovalores")+geom_hline(yintercept = 1)
fviz_eig(PC,
choice = "variance",
barcolor = "green",
barfill = "green",
addlabels = TRUE,
)+labs(title = "Gráfico de Sedimentación",
subtitle = "Usando princomp, con %Varianza Explicada")+
xlab(label = "Componentes")+
ylab(label = "%Varianza")
library(dplyr)
library(kableExtra)
raiz_lambda<-as.matrix(sqrt(descomposicion$values))
autovectores<-descomposicion$vectors
corr_componentes_coordenadas<-vector(mode = "list")
for(j in 1:8){raiz_lambda[j]*autovectores[,j]->corr_componentes_coordenadas[[j]]}
corr_componentes_coordenadas %>% bind_cols()->corr_componentes_coordenadas
names(corr_componentes_coordenadas)<-paste0("Comp",1:8)
corr_componentes_coordenadas %>% as.data.frame() %>%
kable(caption="Correlación de X con las componentes",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| Comp1 | Comp2 | Comp3 | Comp4 | Comp5 | Comp6 | Comp7 | Comp8 |
|---|---|---|---|---|---|---|---|
| -0.72 | -0.06 | -0.03 | 0.60 | -0.25 | 0.22 | 0.04 | 0.01 |
| -0.43 | -0.85 | 0.04 | -0.20 | 0.03 | 0.01 | 0.22 | -0.02 |
| 0.80 | -0.28 | -0.25 | -0.02 | 0.21 | 0.42 | -0.02 | 0.01 |
| 0.42 | -0.40 | 0.81 | 0.03 | -0.05 | 0.09 | -0.07 | -0.01 |
| 0.80 | 0.25 | -0.08 | -0.29 | -0.44 | 0.10 | 0.06 | -0.02 |
| 0.34 | 0.84 | 0.27 | 0.18 | 0.19 | 0.05 | 0.18 | -0.02 |
| 0.91 | -0.23 | -0.01 | 0.24 | -0.05 | -0.19 | 0.08 | 0.08 |
| 0.80 | -0.38 | -0.20 | 0.38 | 0.02 | -0.16 | -0.02 | -0.08 |
library(dplyr)
library(factoextra)
library(kableExtra)
variables_pca<-get_pca_var(PC)
variables_pca$coord%>%
kable(caption="Correlación de X con las componentes, usando factoextra",
align = "c",
digits = 2) %>%
kable_material(html_font = "sans-serif") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
| Dim.1 | Dim.2 | Dim.3 | Dim.4 | Dim.5 | Dim.6 | Dim.7 | Dim.8 | |
|---|---|---|---|---|---|---|---|---|
| X1 | -0.72 | -0.06 | -0.03 | 0.60 | 0.25 | 0.22 | 0.04 | 0.01 |
| X2 | -0.43 | -0.85 | 0.04 | -0.20 | -0.03 | 0.01 | 0.22 | -0.02 |
| X3 | 0.80 | -0.28 | -0.25 | -0.02 | -0.21 | 0.42 | -0.02 | 0.01 |
| X4 | 0.42 | -0.40 | 0.81 | 0.03 | 0.05 | 0.09 | -0.07 | -0.01 |
| X5 | 0.80 | 0.25 | -0.08 | -0.29 | 0.44 | 0.10 | 0.06 | -0.02 |
| X6 | 0.34 | 0.84 | 0.27 | 0.18 | -0.19 | 0.05 | 0.18 | -0.02 |
| X7 | 0.91 | -0.23 | -0.01 | 0.24 | 0.05 | -0.19 | 0.08 | 0.08 |
| X8 | 0.80 | -0.38 | -0.20 | 0.38 | -0.02 | -0.16 | -0.02 | -0.08 |
fviz_pca_var(PC,repel = TRUE,axes = c(1,2))
fviz_pca_var(PC,repel = TRUE,axes = c(3,4))
fviz_pca_var(PC,repel = TRUE,axes = c(5,6))
fviz_pca_var(PC,repel = TRUE,axes = c(7,8))
library(corrplot)
corrplot(variables_pca$coord,is.corr = FALSE,method = "square",addCoef.col="black",number.cex = 0.75)
library(psych)
library(corrplot)
library(dplyr)
#Modelo de 2 Factores (sin rotar)
numero_de_factores<-2
modelo_2_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "none")
modelo_2_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 h2 u2 com
## X1 -0.72 0.06 0.53 0.472 1.0
## X2 -0.43 0.85 0.91 0.093 1.5
## X3 0.80 0.28 0.72 0.280 1.2
## X4 0.42 0.40 0.33 0.668 2.0
## X5 0.80 -0.25 0.70 0.302 1.2
## X6 0.34 -0.84 0.82 0.176 1.3
## X7 0.91 0.23 0.89 0.108 1.1
## X8 0.80 0.38 0.78 0.217 1.4
##
## PC1 PC2
## SS loadings 3.75 1.93
## Proportion Var 0.47 0.24
## Cumulative Var 0.47 0.71
## Proportion Explained 0.66 0.34
## Cumulative Proportion 0.66 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.09
##
## Fit based upon off diagonal values = 0.96
# r es la matriz de correlacion, o se puede pasar la matriz de indicadores (X) y ella sola generaria la matriz de correalacion
# covar = FALSE le estamos diciendo que no es una matriz de covarianza, lo obligamos a usar la matriz de correalcion
# y el rotate es para decirle que no hhay correlacion
# el comando envez de poner omega cuadrado, lo tira como U cuadrado.
#aparecen las primeras 2 componentes porque le ordenamos retener solo los primeros 2 factores.
# h2 es el PC1 al cuadrado por el PC2 al cuadrado esa es la comunalidad relacionado con la varianza
# el u2 es la comunalidad y es 1 menos el h2 esa es la especificidad
# el proportion explained es lo nos intereza, y por los dos factores la suma de ambos nos debe de dar 1, el 0.66 del pc1 se obtiene con la division del ss loadings del pc1 entre la sumatoria de ambos ss loadings= (primero 3.75+1.93 y eso da 5.68 y se divide= 3.75/5.98 y asi da el proportion explained del pc1) y para el de pc2 es 1.92/5.68.
correlaciones_modelo<-variables_pca$coord
corrplot(correlaciones_modelo[,1:numero_de_factores],
is.corr = FALSE,
method = "square",addCoef.col="black",number.cex = 0.75)
# la mayoria de variables esta en la primera dimencion y eso no esta bien, para eso se ocupara la rotacion, para hacer mas clara la interpretacion de la clasificacion de las variables
library(psych)
library(corrplot)
library(dplyr)
#Modelo de 3 Factores (sin rotar)
numero_de_factores<-3
modelo_3_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "none")
modelo_3_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## X1 -0.72 0.06 -0.03 0.53 0.472 1.0
## X2 -0.43 0.85 0.04 0.91 0.092 1.5
## X3 0.80 0.28 -0.25 0.78 0.219 1.4
## X4 0.42 0.40 0.81 0.98 0.017 2.0
## X5 0.80 -0.25 -0.08 0.71 0.295 1.2
## X6 0.34 -0.84 0.27 0.90 0.101 1.6
## X7 0.91 0.23 -0.01 0.89 0.108 1.1
## X8 0.80 0.38 -0.20 0.82 0.179 1.6
##
## PC1 PC2 PC3
## SS loadings 3.75 1.93 0.84
## Proportion Var 0.47 0.24 0.10
## Cumulative Var 0.47 0.71 0.81
## Proportion Explained 0.58 0.30 0.13
## Cumulative Proportion 0.58 0.87 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
##
## Fit based upon off diagonal values = 0.97
# aca x4 esta con mejor comunalidad (h2) es decir que esa variable queda mejor representada con 3 confactores, pero las demas variables no presentan mayor diferencia
# los proportion explained disminuyen de correlacion porque hay mas factores, el calculo es el mismo para todos, ese porcentaje de explicacion del factor 1 y 2 se les paso al 3, siempre la estructura de peso sera decreciente, es decir que el factor 2 tendra menos que el 1, y el factor 3 sera menor que el 2 y asi.
correlaciones_modelo<-variables_pca$coord
corrplot(correlaciones_modelo[,1:numero_de_factores],
is.corr = FALSE,
method = "square",addCoef.col="black",number.cex = 0.75)
library(psych)
library(corrplot)
library(dplyr)
#Modelo de 4 Factores (sin rotar)
numero_de_factores<-4
modelo_4_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "none")
modelo_4_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "none",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 h2 u2 com
## X1 -0.72 0.06 -0.03 0.60 0.89 0.112 2.0
## X2 -0.43 0.85 0.04 -0.20 0.95 0.050 1.6
## X3 0.80 0.28 -0.25 -0.02 0.78 0.219 1.4
## X4 0.42 0.40 0.81 0.03 0.98 0.016 2.0
## X5 0.80 -0.25 -0.08 -0.29 0.79 0.208 1.5
## X6 0.34 -0.84 0.27 0.18 0.93 0.070 1.7
## X7 0.91 0.23 -0.01 0.24 0.95 0.052 1.3
## X8 0.80 0.38 -0.20 0.38 0.97 0.032 2.1
##
## PC1 PC2 PC3 PC4
## SS loadings 3.75 1.93 0.84 0.72
## Proportion Var 0.47 0.24 0.10 0.09
## Cumulative Var 0.47 0.71 0.81 0.91
## Proportion Explained 0.52 0.27 0.12 0.10
## Cumulative Proportion 0.52 0.78 0.90 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 4 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.04
##
## Fit based upon off diagonal values = 0.99
# las comunalidad sirve para que en la especificidad se muestre el porcentaje que se pierde
# la cumulative var, es la varianza acumulada que significa en cuanto explica el modelo.
correlaciones_modelo<-variables_pca$coord
corrplot(correlaciones_modelo[,1:numero_de_factores],
is.corr = FALSE,
method = "square",addCoef.col="black",number.cex = 0.75)
# Clase del 29 de octubre de 2020
Esta operación se implementa para mejorar la interpretación de la extracción (solución) propuesta. Hay dos tipos de rotación: Ortogonal y Oblicua, la primera mantiene la independencia entre los factores, en cambio la segunda no.
Para efectos de la agrupación de variables, se implementará la rotación ortogonal VARIMAX (busca garantizar la varianza maxima en cada componente), propuesta por Kaiser, ya que permite obtener factores independientes y mantener la estructura de varianza de la solución original.
library(psych)
library(corrplot)
library(dplyr)
#Modelo de 2 Factores (Rotado)
numero_de_factores<-2
modelo_2_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "varimax") # aca varia segun la clase anterior, aca se aplica la rotacion y del tipo varimax, puedo que los signos en esta parte con la funcion varimax de abajo de diferente, pero las magnitudes seran las mismas asi que no habra problema.
modelo_2_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "varimax",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## X1 -0.63 -0.36 0.53 0.472 1.6
## X2 -0.04 -0.95 0.91 0.093 1.0
## X3 0.84 0.08 0.72 0.280 1.0
## X4 0.54 -0.19 0.33 0.668 1.2
## X5 0.62 0.56 0.70 0.302 2.0
## X6 -0.03 0.91 0.82 0.176 1.0
## X7 0.93 0.16 0.89 0.108 1.1
## X8 0.88 -0.01 0.78 0.217 1.0
##
## RC1 RC2
## SS loadings 3.45 2.24
## Proportion Var 0.43 0.28
## Cumulative Var 0.43 0.71
## Proportion Explained 0.61 0.39
## Cumulative Proportion 0.61 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.09
##
## Fit based upon off diagonal values = 0.96
correlaciones_modelo<-variables_pca$coord
correlaciones_modelo_rotada<-varimax(correlaciones_modelo[,1:numero_de_factores])$loadings
# en el comando varimax se hace la rotacion para la forma grafica
corrplot(correlaciones_modelo_rotada[,1:numero_de_factores],
is.corr = FALSE,
method = "square",
addCoef.col="black",
number.cex = 0.75)
# las magnitudes de los valores DIM1 y DIM2 deben ser las misma que de la columna RC1 y RC2, el signo puede cambiar pero la magintud debe ser la misma.
#Modelo de 3 Factores (Rotado)
numero_de_factores<-3
modelo_3_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "varimax")
modelo_3_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "varimax",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## X1 -0.62 -0.33 -0.18 0.53 0.472 1.7
## X2 -0.15 -0.92 0.18 0.91 0.092 1.1
## X3 0.88 -0.01 0.04 0.78 0.219 1.0
## X4 0.23 -0.06 0.96 0.98 0.017 1.1
## X5 0.67 0.51 0.04 0.71 0.295 1.9
## X6 -0.03 0.94 0.09 0.90 0.101 1.0
## X7 0.90 0.12 0.27 0.89 0.108 1.2
## X8 0.89 -0.09 0.12 0.82 0.179 1.1
##
## RC1 RC2 RC3
## SS loadings 3.30 2.13 1.09
## Proportion Var 0.41 0.27 0.14
## Cumulative Var 0.41 0.68 0.81
## Proportion Explained 0.51 0.33 0.17
## Cumulative Proportion 0.51 0.83 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
##
## Fit based upon off diagonal values = 0.97
correlaciones_modelo<-variables_pca$coord
correlaciones_modelo_rotada<-varimax(correlaciones_modelo[,1:numero_de_factores],
normalize = TRUE)$loadings
corrplot(correlaciones_modelo_rotada[,1:numero_de_factores],
is.corr = FALSE,
method = "square",
addCoef.col="black",
number.cex = 0.75)
#Modelo de 4 Factores (Rotado)
numero_de_factores<-4
modelo_4_factores<-principal(r = Rx$r,
nfactors = numero_de_factores,
covar = FALSE,
rotate = "varimax")
modelo_4_factores
## Principal Components Analysis
## Call: principal(r = Rx$r, nfactors = numero_de_factores, rotate = "varimax",
## covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC4 RC3 h2 u2 com
## X1 -0.22 -0.10 -0.90 -0.14 0.89 0.112 1.2
## X2 -0.12 -0.94 -0.16 0.18 0.95 0.050 1.2
## X3 0.76 -0.04 0.45 0.03 0.78 0.219 1.6
## X4 0.22 -0.06 0.11 0.96 0.98 0.016 1.1
## X5 0.38 0.37 0.72 0.01 0.79 0.208 2.1
## X6 -0.06 0.95 0.12 0.10 0.93 0.070 1.1
## X7 0.88 0.15 0.29 0.27 0.95 0.052 1.5
## X8 0.97 0.00 0.10 0.12 0.97 0.032 1.1
##
## RC1 RC2 RC4 RC3
## SS loadings 2.55 1.95 1.67 1.07
## Proportion Var 0.32 0.24 0.21 0.13
## Cumulative Var 0.32 0.56 0.77 0.91
## Proportion Explained 0.35 0.27 0.23 0.15
## Cumulative Proportion 0.35 0.62 0.85 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 4 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.04
##
## Fit based upon off diagonal values = 0.99
# aca las columnas pueden cambiar de orden por la rotacion
correlaciones_modelo<-variables_pca$coord
correlaciones_modelo_rotada<-varimax(correlaciones_modelo[,1:numero_de_factores],
normalize = TRUE)$loadings
corrplot(correlaciones_modelo_rotada[,1:numero_de_factores],
is.corr = FALSE,
method = "square",
addCoef.col="black",
number.cex = 0.75)
aca los importante es que: Ho= R∼I H1= R≁I
con gl=p(p−1)/2
Rechazar H0 si χ2B ≥ V.C., o si p≤α
library(psych)
options(scipen = 99999)
Barlett<-cortest.bartlett(mat_X)
## R was not square, finding R from data
print(Barlett)
## $chisq
## [1] 99.52973
##
## $p.value
## [1] 0.0000000006035519
##
## $df
## [1] 28
# EL pvalue es mas pequeño que el nivel de significancia del 5% (alfa) rechazamos la H0 y concluimos que la matriz R es distinta de la matriz identidad (I), nuestro analisis puede aplicarse.
Prueba de adecuación muestral de Kaiser Meyer Olkin (KMO) Se considerará adecuado el uso del Análisis Factorial si KMO>0.5, de lo contrario no. Usando: Psych
library(psych)
KMO<-KMO(mat_X)
print(KMO)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = mat_X)
## Overall MSA = 0.5
## MSA for each item =
## X1 X2 X3 X4 X5 X6 X7 X8
## 0.75 0.42 0.62 0.53 0.50 0.35 0.51 0.43
# deberia ser al menos 0.5 por variable
library(rela)
KMO<-paf(as.matrix(mat_X))$KMO
print(KMO)
## [1] 0.49718
# redondeando estamos en el umbral minimo para la aplicacion de la regla, justo en el limite, por eso tenemos un "pobre ajuste" ya que tenemos valores que estan poblados por una sola variable, y eso es porque estamos sobre el limite.
# si fuera mayor este valor en los graficos no estuvieran centrado en una sola variable todo.