Warning: package 'psych' was built under R version 4.3.3
library(GPArotation)
Warning: package 'GPArotation' was built under R version 4.3.3
Attaching package: 'GPArotation'
The following objects are masked from 'package:psych':
equamax, varimin
library(corrplot)
corrplot 0.92 loaded
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Importar datos desde Exceldatos<-read_excel("C:/Users/MINEDUCYT/Downloads/ilpd+indian+liver+patient+dataset/Indian Liver Patient Dataset (ILPD) india.xlsx")View(datos)# verificar valores perdidos sumando cauntos haysum(is.na(datos))
[1] 4
#otra forma es Ver si hay algún valor perdido en toda la base:any(is.na(datos))
[1] TRUE
# Seleccionar las variables independientesdatos_factoriales <- datos %>%select(-Genero,-borrar,Tp)# Conversión segura: los factores/character se convierten a números por sus nivelesdatos_factoriales <- datos_factoriales %>%mutate(across(everything(), ~as.numeric(as.character(.))))datos_factoriales. <-na.omit(datos_factoriales)sum(is.na(datos_factoriales.)) # debe dar 0
[1] 0
# Escalar los datosdatos_esc <-scale(datos_factoriales.)
Verificar supuestos: KMO y test de Bartlett
# Matriz de correlacióncor_matrix <-cor(datos_esc)# Visualizar matriz de correlacióncorrplot(cor_matrix, method ="color", tl.cex =0.7)
# Prueba de esfericidad de Bartlettcortest.bartlett(cor_matrix, n =nrow(datos_esc))
$chisq
[1] 2953.766
$p.value
[1] 0
$df
[1] 36
#p-value = 0:#Técnicamente esto significa p < 0.0001 o un valor extremadamente pequeño.#Esto implica que rechazas la hipótesis nula de que las variables no están correlacionadas.#La prueba de esfericidad de Bartlett es significativa -> los datos presentan correlaciones suficientes entre variables como para aplicar un Análisis Factorial Exploratorio (AFE)
# Instala si no la tienesinstall.packages("ggcorrplot")
Installing package into 'C:/Users/MINEDUCYT/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'ggcorrplot' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\MINEDUCYT\AppData\Local\Temp\RtmpInheve\downloaded_packages
library(ggcorrplot)
Warning: package 'ggcorrplot' was built under R version 4.3.3
Loading required package: ggplot2
Attaching package: 'ggplot2'
The following objects are masked from 'package:psych':
%+%, alpha
# Matriz de correlacionescor_matrix <-cor(datos_esc)# Gráfico con ggcorrplotggcorrplot(cor_matrix,hc.order =TRUE, # Agrupa por jerarquíatype ="lower", # Solo triángulo inferiorlab =TRUE, # Etiquetas de correlaciónlab_size =3,colors =c("red", "white", "blue"),title ="Matriz de correlaciones (ggcorrplot)",ggtheme = ggplot2::theme_minimal())
# Seleccionar las variables independientes y las que el KMO es pequeñodatos_factoriales2 <- datos %>%select(-Genero,-borrar,-Tp,-Relacion)# Conversión segura: los factores/character se convierten a números por sus nivelesdatos_factoriales2 <- datos_factoriales2 %>%mutate(across(everything(), ~as.numeric(as.character(.))))datos_factoriales2. <-na.omit(datos_factoriales2)sum(is.na(datos_factoriales2.)) # debe dar 0
[1] 0
# Escalar los datosdatos_esc2 <-scale(datos_factoriales2.)# Matriz de correlacióncor_matrix2 <-cor(datos_esc2)# Prueba de adecuación KMOKMO(cor_matrix2)
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor_matrix2)
Overall MSA = 0.58
MSA for each item =
Edad Tuberculosis Basededatos Alkfos Sargento Sgot
0.50 0.57 0.57 0.86 0.55 0.56
Alba
0.71
# Prueba de esfericidad de Bartlettcortest.bartlett(cor_matrix2, n =nrow(datos_esc2))
# Análisis factorial sin rotación, 3 factores inicialesfactores_sin_rot <-fa(datos_esc2, nfactors =3, rotate ="none", fm ="ml")print(factores_sin_rot)
Factor Analysis using method = ml
Call: fa(r = datos_esc2, nfactors = 3, rotate = "none", fm = "ml")
Standardized loadings (pattern matrix) based upon correlation matrix
ML1 ML2 ML3 h2 u2 com
Edad 0.01 -0.05 0.51 0.27 0.733 1.0
Tuberculosis 0.89 -0.08 0.01 0.79 0.209 1.0
Basededatos 0.98 -0.09 -0.01 0.97 0.032 1.0
Alkfos 0.25 0.08 0.19 0.10 0.896 2.1
Sargento 0.31 0.80 -0.08 0.75 0.255 1.3
Sgot 0.34 0.86 0.04 0.86 0.143 1.3
Alba -0.24 0.02 -0.51 0.32 0.681 1.4
ML1 ML2 ML3
SS loadings 2.08 1.40 0.57
Proportion Var 0.30 0.20 0.08
Cumulative Var 0.30 0.50 0.58
Proportion Explained 0.51 0.35 0.14
Cumulative Proportion 0.51 0.86 1.00
Mean item complexity = 1.3
Test of the hypothesis that 3 factors are sufficient.
df null model = 21 with the objective function = 2.74 with Chi Square = 1587.78
df of the model are 3 and the objective function was 0
The root mean square of the residuals (RMSR) is 0.01
The df corrected root mean square of the residuals is 0.01
The harmonic n.obs is 583 with the empirical chi square 0.67 with prob < 0.88
The total n.obs was 583 with Likelihood Chi Square = 1.27 with prob < 0.74
Tucker Lewis Index of factoring reliability = 1.008
RMSEA index = 0 and the 90 % confidence intervals are 0 0.049
BIC = -17.83
Fit based upon off diagonal values = 1
Measures of factor score adequacy
ML1 ML2 ML3
Correlation of (regression) scores with factors 0.99 0.94 0.67
Multiple R square of scores with factors 0.97 0.89 0.45
Minimum correlation of possible factor scores 0.94 0.78 -0.10
Determinar número óptimo de factores (Scree Plot y eigenvalores)
# Scree plot y eigenvaloresfa.parallel(datos_esc2, fa ="fa", fm ="ml", n.iter =100)
Parallel analysis suggests that the number of factors = 3 and the number of components = NA
#Este gráfico sirve para decidir cuántos factores conservar,se basa en el criterio de eigenvalores > 1 y el punto de quiebre en la curva.
library(ggplot2)# Calcular valores propios (eigenvalores)eigen_vals <-eigen(cor(datos_esc2))$values# Crear dataframedf_eigen <-data.frame(Factor =1:length(eigen_vals),Eigenvalue = eigen_vals)# Gráfico tipo scree plotggplot(df_eigen, aes(x = Factor, y = Eigenvalue)) +geom_line(color ="steelblue") +geom_point(size =2, color ="red") +geom_hline(yintercept =1, linetype ="dashed", color ="gray") +ggtitle("Scree Plot de Eigenvalores") +xlab("Número de Factor") +ylab("Eigenvalor") +theme_minimal()
Interpretar matriz de cargas factoriales
# Extraer factores con la cantidad óptima en este caso seria 3 factoresmodelo_factorial <-fa(datos_esc2, nfactors =3, rotate ="none", fm ="ml")print(modelo_factorial$loadings)
Loadings:
ML1 ML2 ML3
Edad 0.514
Tuberculosis 0.886
Basededatos 0.980
Alkfos 0.248 0.189
Sargento 0.312 0.802
Sgot 0.342 0.859
Alba -0.238 -0.512
ML1 ML2 ML3
SS loadings 2.077 1.405 0.569
Proportion Var 0.297 0.201 0.081
Cumulative Var 0.297 0.497 0.579
# Cargas con más claridad para interpretaciónprint(modelo_rotado)
Factor Analysis using method = ml
Call: fa(r = datos_esc2, nfactors = 3, rotate = "varimax", fm = "ml")
Standardized loadings (pattern matrix) based upon correlation matrix
ML1 ML2 ML3 h2 u2 com
Edad -0.04 -0.06 0.51 0.27 0.733 1.0
Tuberculosis 0.87 0.12 0.11 0.79 0.209 1.1
Basededatos 0.97 0.13 0.10 0.97 0.032 1.1
Alkfos 0.20 0.14 0.21 0.10 0.896 2.7
Sargento 0.13 0.85 -0.05 0.75 0.255 1.1
Sgot 0.13 0.91 0.07 0.86 0.143 1.1
Alba -0.18 -0.03 -0.53 0.32 0.681 1.2
ML1 ML2 ML3
SS loadings 1.81 1.62 0.62
Proportion Var 0.26 0.23 0.09
Cumulative Var 0.26 0.49 0.58
Proportion Explained 0.45 0.40 0.15
Cumulative Proportion 0.45 0.85 1.00
Mean item complexity = 1.3
Test of the hypothesis that 3 factors are sufficient.
df null model = 21 with the objective function = 2.74 with Chi Square = 1587.78
df of the model are 3 and the objective function was 0
The root mean square of the residuals (RMSR) is 0.01
The df corrected root mean square of the residuals is 0.01
The harmonic n.obs is 583 with the empirical chi square 0.67 with prob < 0.88
The total n.obs was 583 with Likelihood Chi Square = 1.27 with prob < 0.74
Tucker Lewis Index of factoring reliability = 1.008
RMSEA index = 0 and the 90 % confidence intervals are 0 0.049
BIC = -17.83
Fit based upon off diagonal values = 1
Measures of factor score adequacy
ML1 ML2 ML3
Correlation of (regression) scores with factors 0.98 0.94 0.68
Multiple R square of scores with factors 0.96 0.89 0.46
Minimum correlation of possible factor scores 0.92 0.79 -0.09
Gráficos de factores
# Mapa de factores con rotaciónfa.diagram(modelo_rotado)
# Otra forma: gráfico de carga factorialplot(modelo_rotado$loadings, main ="Cargas factoriales (rotadas)",xlab ="Factor 1", ylab ="Factor 2")abline(h =0, v =0, col ="gray")
# Cargas factoriales como matrizload_matrix <- modelo_rotado$loadings[,1:3] # Ajustado número de factores# Convertir a data frameload_df <-as.data.frame(unclass(load_matrix))# Visualizar con heatmapheatmap(as.matrix(load_df),Colv =NA, Rowv =NA,col =colorRampPalette(c("white", "red"))(100),scale ="none",main ="Mapa de calor de cargas factoriales")
install.packages("plotly")
Installing package into 'C:/Users/MINEDUCYT/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'plotly' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\MINEDUCYT\AppData\Local\Temp\RtmpInheve\downloaded_packages
library(plotly)
Warning: package 'plotly' was built under R version 4.3.3
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
df_load <-as.data.frame(load_matrix)df_load$Variable <-rownames(df_load)# Gráfico 3D interactivoplot_ly(df_load, x =~ML1, y =~ML2, z =~ML3,text =~Variable,type ='scatter3d', mode ='text+markers',marker =list(size =5),textposition ="top center") %>%layout(title ='Gráfico 3D de Cargas Factoriales')