Análisis de factorial Sobre el tamaño muestral Mínimo 50 casos, sugerido más de 200. Al menos 10 casos por cada variable; la cantidad de variables no debe exceder la mitad de los casos.
library(readxl)
gorrion=read_excel("C:/Users/Alumno/Desktop/baseTaller/gorriones.xlsx")
gorrion=data.frame(gorrion)
head(gorrion)
## x1 x2 x3 x4 x5 sobrevi
## 1 156 245 31.6 18.5 20.5 sobrevivió
## 2 154 240 30.4 17.9 19.6 sobrevivió
## 3 153 240 31.0 18.4 20.6 sobrevivió
## 4 153 236 30.9 17.7 20.2 sobrevivió
## 5 155 243 31.5 18.6 20.3 sobrevivió
## 6 163 247 32.0 19.0 20.9 sobrevivió
Matriz de correlaciones(gorriones)
cor.gor=round(cor(gorrion[,1:5]),3)
cor.gor
## x1 x2 x3 x4 x5
## x1 1.000 0.735 0.662 0.645 0.605
## x2 0.735 1.000 0.674 0.769 0.529
## x3 0.662 0.674 1.000 0.763 0.526
## x4 0.645 0.769 0.763 1.000 0.607
## x5 0.605 0.529 0.526 0.607 1.000
0,3 (baja colinealidad), 0,5 (colinealidad media), 0,7 (colinealidad alta). Supuesto de colinealidad
det(cor.gor)
## [1] 0.03677719
cercano a 0, indica alta multicolinealidad entre las variables. igual a 0 (matriz no singular). Supuesto de multicolinealidad test de esfericidad de Bartlett busca contrastar la hipótesis nula de que la matriz de correlaciones es igual a una matriz de identidad
library(psych)
cortest.bartlett(cor.gor,n=nrow(gorrion))
## $chisq
## [1] 150.2809
##
## $p.value
## [1] 3.263015e-27
##
## $df
## [1] 10
KMO(cor.gor)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor.gor)
## Overall MSA = 0.83
## MSA for each item =
## x1 x2 x3 x4 x5
## 0.82 0.81 0.86 0.79 0.87
0,90 > KMO Muy bueno 0,90 > KMO > 0,80 Bueno 0,80 > KMO > 0,70 Aceptable 0,70 > KMO > 0,60 Mediocre o regular 0,60 > KMO > 0,50 Malo 0,50 > KMO Inaceptable o muy malo Autovalores y autovectores de la matriz de covarianzas de la muestra
aucor=eigen(cor.gor)
Porcentajes de variación explicada por cada componente
Prop.Var=aucor$values/sum(aucor$values)*100
cumProp.var=cumsum(aucor$values/sum(aucor$values)*100)
porc=data.frame(Comp=1:5,Autovalor=round(aucor$values,3),Porc.Var=round(Prop.Var,3),Acum.Porc.Var=round(cumProp.var,3))
variación total explicada
porc
## Comp Autovalor Porc.Var Acum.Porc.Var
## 1 1 3.616 72.323 72.323
## 2 2 0.532 10.632 82.956
## 3 3 0.386 7.725 90.681
## 4 4 0.302 6.038 96.719
## 5 5 0.164 3.281 100.000
Cálculos de Comunalidades Matris de Autovalores(egien)
mautov=matrix(diag(aucor$values),ncol=5,nrow=5)
Matriz de Cargas Factoriales
lamda=aucor$vectors%*%sqrt(mautov)
hi=lamda%*%t(lamda)
comunalidad
hi2=diag(hi)
hi2
## [1] 1 1 1 1 1
especificidad
him=lamda[,1:2]%*%t(lamda[,1:2])
comunalidad
hi2m=diag(him)
hi2m
## [1] 0.7393089 0.8188182 0.7900999 0.8192952 0.9802729
Matriz de Comunalidades
data.frame(varible=names(gorrion[,1:5]),inicial=round(hi2,3),extraccio=round(hi2m,3))
## varible inicial extraccio
## 1 x1 1 0.739
## 2 x2 1 0.819
## 3 x3 1 0.790
## 4 x4 1 0.819
## 5 x5 1 0.980
Varianza Total
sum(aucor$values)
## [1] 5
Gráfico de sedimentación
plot(1:5,aucor$values,type="l",xlab="Componetes",ylab="autovalores")
Matriz de Componente
data.frame(varible=names(gorrion[,1:5]),comp1=round(-lamda[,1],3),comp1=round(lamda[,2],3))
## varible comp1 comp1.1
## 1 x1 0.859 0.037
## 2 x2 0.878 -0.219
## 3 x3 0.857 -0.237
## 4 x4 0.895 -0.134
## 5 x5 0.756 0.639
Calculando la Matriz de Puntuaciones Factoriales
#F=bX(ojo para escribir la ecuación)
B=solve(cor.gor)%*%lamda
Matriz de Puntuaciones Factoriales de componente
data.frame(varible=names(gorrion[,1:5]),Fac1=round(-B[,1],3),Fac2=round(B[,2],3))
## varible Fac1 Fac2
## x1 x1 0.238 0.069
## x2 x2 0.243 -0.411
## x3 x3 0.237 -0.446
## x4 x4 0.248 -0.252
## x5 x5 0.209 1.202
ESTANDARIZACIÓN DE VARIABLES
#scale(gorrion[1:5])
estgor=data.frame(scale(gorrion [1:5]))
#Estimación factorial del individuo.
Fp=data.matrix (estgor)%*%(B)
head(Fp)
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.03385696 -0.82380643 -0.2806238 -0.936760452 1.3554762
## [2,] 1.14650746 -0.60669127 0.6380144 -1.178628395 0.5725324
## [3,] 0.60237905 0.02771788 -1.0935595 -1.297291161 0.5177584
## [4,] 1.21531423 0.23540255 -0.4911950 0.275541850 1.1800958
## [5,] 0.15510573 -0.91143170 -0.7682751 -0.989725507 0.6039108
## [6,] -1.00770843 -0.81667130 0.9985366 0.005201996 -0.7056362
Aplicación: Gráfico correlaciones factores y variables
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
B1=data.frame(B)
ggplot(B1,aes(-B1[,1],B1[,2],label=rownames(B1)))+geom_point()+geom_text(vjust = 2)+xlab("Fact 1")+ylab("Fact 2")+geom_hline(yintercept=0,size=1)+geom_vline(xintercept=0,size=1)
Aplicación: Gráfico con puntuaciones factoriales
grap.fact=data.frame(y1=-Fp[,1],y2=Fp[,2],lab=1:49,grupo=gorrion[,6])
ggplot(grap.fact,aes(y1,y2,label=lab,color=grupo))+geom_point()+geom_text(vjust = 2)+xlab("Fact 1")+ylab("Fact 2")+geom_hline(yintercept=0,size=1)+geom_vline(xintercept=0,size=1)
Prueba de hipótesis con m Factoriales Test para ajuste de m factores Es adecuado m factores para explicar la estructura de asociación
f=fa(gorrion[,1:5],nfactors=2,fm="ml",rotate="none", max.iter=100)
Ponderaciones factores, autovalores comunalidades, especificidades Por funciones
Mtrz=data.frame(varible=names(gorrion[,1:5]),Fact1=round(-lamda[,1],3),Fact2=round(-lamda[,2],3), ComFac1=round(lamda[,1]^2,3), ComFac2=round(lamda[,2]^2,3),ComT=round(hi2m,3),Esp=1-round(hi2m,3))
Mtrz
## varible Fact1 Fact2 ComFac1 ComFac2 ComT Esp
## 1 x1 0.859 -0.037 0.738 0.001 0.739 0.261
## 2 x2 0.878 0.219 0.771 0.048 0.819 0.181
## 3 x3 0.857 0.237 0.734 0.056 0.790 0.210
## 4 x4 0.895 0.134 0.801 0.018 0.819 0.181
## 5 x5 0.756 -0.639 0.572 0.408 0.980 0.020
autovalores de la matriz de correlaciones
colSums(Mtrz[,4:6])
## ComFac1 ComFac2 ComT
## 3.616 0.531 4.147
porcentaje de variación explicada
colSums(Mtrz[,4:6])/5*100
## ComFac1 ComFac2 ComT
## 72.32 10.62 82.94
Métodos por funciones Coeficiente De correlación
R=cor(gorrion[,1:5])
R
## x1 x2 x3 x4 x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000
Extracción por componentes principales
fit.pca=principal(R,nfactors=2,rotate="none")
fit.pca
## Principal Components Analysis
## Call: principal(r = R, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 h2 u2 com
## x1 0.86 0.04 0.74 0.26 1.0
## x2 0.88 -0.22 0.82 0.18 1.1
## x3 0.86 -0.24 0.79 0.21 1.2
## x4 0.90 -0.13 0.82 0.18 1.0
## x5 0.76 0.64 0.98 0.02 1.9
##
## PC1 PC2
## SS loadings 3.62 0.53
## Proportion Var 0.72 0.11
## Cumulative Var 0.72 0.83
## Proportion Explained 0.87 0.13
## Cumulative Proportion 0.87 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
##
## Fit based upon off diagonal values = 0.99
plot(fit.pca,labels=row.names(R),cex=.7, ylim=c(-.8,.8))
Extracción por ejes principales
fit.pa=fa(R,nfactors=2,fm="pa",rotate="none",n.obs=220)
## maximum iteration exceeded
## The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : An ultra-Heywood case was detected. Examine the results carefully
fit.pa
## Factor Analysis using method = pa
## Call: fa(r = R, nfactors = 2, n.obs = 220, rotate = "none", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## x1 0.90 0.47 1.03 -0.026 1.5
## x2 0.83 -0.02 0.69 0.313 1.0
## x3 0.80 -0.10 0.65 0.354 1.0
## x4 0.92 -0.37 0.98 0.020 1.3
## x5 0.66 0.02 0.44 0.565 1.0
##
## PA1 PA2
## SS loadings 3.40 0.37
## Proportion Var 0.68 0.07
## Cumulative Var 0.68 0.76
## Proportion Explained 0.90 0.10
## Cumulative Proportion 0.90 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 3.3 with Chi Square of 714.65
## The degrees of freedom for the model are 1 and the objective function was 0.01
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.02
##
## The harmonic number of observations is 220 with the empirical chi square 0.26 with prob < 0.61
## The total number of observations was 220 with Likelihood Chi Square = 1.37 with prob < 0.24
##
## Tucker Lewis Index of factoring reliability = 0.995
## RMSEA index = 0.042 and the 90 % confidence intervals are 0 0.19
## BIC = -4.03
## Fit based upon off diagonal values = 1
plot(fit.pa,labels=row.names(R),cex=.7, ylim=c(-.8,.8))
extracción por máxima verosimilitud
fit.ml=fa(R,nfactors=2,fm="ml",rotate="none",n.obs=220)
fit.ml
## Factor Analysis using method = ml
## Call: fa(r = R, nfactors = 2, n.obs = 220, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## x1 0.98 -0.17 1.00 0.005 1.1
## x2 0.79 0.26 0.69 0.307 1.2
## x3 0.73 0.32 0.64 0.362 1.4
## x4 0.77 0.63 0.98 0.015 1.9
## x5 0.65 0.18 0.45 0.552 1.1
##
## ML1 ML2
## SS loadings 3.13 0.63
## Proportion Var 0.63 0.13
## Cumulative Var 0.63 0.75
## Proportion Explained 0.83 0.17
## Cumulative Proportion 0.83 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 3.3 with Chi Square of 714.65
## The degrees of freedom for the model are 1 and the objective function was 0.01
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic number of observations is 220 with the empirical chi square 0.41 with prob < 0.52
## The total number of observations was 220 with Likelihood Chi Square = 1.32 with prob < 0.25
##
## Tucker Lewis Index of factoring reliability = 0.995
## RMSEA index = 0.04 and the 90 % confidence intervals are 0 0.189
## BIC = -4.07
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 1.00 0.99
## Multiple R square of scores with factors 1.00 0.97
## Minimum correlation of possible factor scores 0.99 0.94
plot(fit.ml,labels=row.names(R),cex=.7, ylim=c(-.8,.8))
gráfico de sedimentación
fa.parallel(R,n.obs=220,ylabel="Eigenvalues")
## The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.
## Parallel analysis suggests that the number of factors = 1 and the number of components = 1
fa.parallel(R,fm="pa",n.obs=220,ylabel="Eigenvalues")
## Parallel analysis suggests that the number of factors = 1 and the number of components = 1
fa.parallel(R,fm="ml",n.obs=220,ylabel="Eigenvalues")
## Parallel analysis suggests that the number of factors = 1 and the number of components = 1
test de Barlett para número de componentes install.packages(“nFactors”)
library(nFactors)
## Loading required package: MASS
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
##
## melanoma
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
nBartlett(R, N=49, alpha=0.01, cor=TRUE, details=TRUE)
## bartlett anderson lawley
## 1 1 1
Puntuaciones factoriales
punt.fac=factor.scores(R,fit.pca,method="Thurstone")
punt.fac$weights
## PC1 PC2
## x1 0.2375923 0.06957251
## x2 0.2427890 -0.41089955
## x3 0.2369311 -0.44520324
## x4 0.2475524 -0.25332383
## x5 0.2091297 1.20224600
Puntuaciones Individuales para PCA
fit.pca=principal(gorrion[,1:5],nfactors=2,rotate="none",scores=TRUE)
fit.pca$scores
## PC1 PC2
## [1,] 0.03380834 -0.82414469
## [2,] -1.14658406 -0.60668665
## [3,] -0.60243068 0.02641012
## [4,] -1.21534442 0.23591559
## [5,] -0.15515686 -0.91243944
## [6,] 1.00772486 -0.81648713
## [7,] -0.55236788 -0.16433982
## [8,] 0.23062047 -0.22491468
## [9,] 1.41539362 -1.07300473
## [10,] 0.09765054 1.80198117
## [11,] 0.19516205 1.56155323
## [12,] 0.14078124 -0.43171382
## [13,] 1.24068198 0.01521951
## [14,] 0.37581916 -1.90488740
## [15,] -0.73321017 -0.60761036
## [16,] -0.81967866 0.19854359
## [17,] 0.28835598 0.74102531
## [18,] -0.87176139 0.92225688
## [19,] -0.93431524 0.12968640
## [20,] 1.14434555 0.37678167
## [21,] -0.24052328 1.45588049
## [22,] -0.50753316 0.14125819
## [23,] -0.34605852 -0.10677726
## [24,] 0.83302315 0.25564139
## [25,] -1.95459517 0.61668574
## [26,] 1.11673905 1.08132585
## [27,] -0.69881890 -0.46478892
## [28,] 0.90625218 -0.49870540
## [29,] 2.10054419 0.59128341
## [30,] -1.95323930 0.78394726
## [31,] 0.07802935 3.88229990
## [32,] 0.62846751 0.28773402
## [33,] 0.54162405 0.57618331
## [34,] -0.37591049 -2.17655258
## [35,] -0.16694902 0.68126654
## [36,] 1.47053926 0.34671695
## [37,] -2.22986828 -0.77731233
## [38,] -0.28496813 0.05903244
## [39,] -1.00217194 -1.23744209
## [40,] 2.14106132 0.31678143
## [41,] 0.03304581 0.31058555
## [42,] -0.49344334 -0.33938446
## [43,] -0.22236637 -0.34109867
## [44,] 0.83446082 -0.91458550
## [45,] -1.31941060 -0.26051523
## [46,] 0.85129077 -1.43075400
## [47,] -0.81985191 0.16139468
## [48,] 0.81879053 -0.48644915
## [49,] 1.12234601 -0.95679632
para pa y ml
fit.pa=fa(gorrion[,1:5],nfactors=2,fm="pa",rotate="none",n.obs=220,scores="regression")
## maximum iteration exceeded
## The estimated weights for the factor scores are probably incorrect. Try a different factor extraction method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : An ultra-Heywood case was detected. Examine the results carefully
fit.pa$scores
## PA1 PA2
## [1,] -0.24458497 -0.82109384
## [2,] -1.16147790 -0.17976261
## [3,] -0.78804234 -1.55928545
## [4,] -1.49700166 -0.14640236
## [5,] -0.28214155 -1.31194580
## [6,] 1.26590788 0.62635487
## [7,] -0.21457771 -0.08890336
## [8,] -0.25007893 -1.37675859
## [9,] 1.52836393 0.71048463
## [10,] 0.31249012 -0.56339172
## [11,] 0.11505401 -0.24568117
## [12,] 0.41125761 0.46012392
## [13,] 1.28571637 -0.68414191
## [14,] 0.53194292 -1.61229991
## [15,] -0.49303026 0.49688198
## [16,] -0.76643465 0.32602521
## [17,] 0.01746946 -0.12150268
## [18,] -1.01095489 -1.11819347
## [19,] -0.41161531 -0.89690638
## [20,] 0.85821540 1.32191362
## [21,] -0.33842980 1.31170872
## [22,] -0.90213925 -0.13450810
## [23,] -0.55066924 -0.16264555
## [24,] 0.64751014 -0.04393886
## [25,] -2.15921106 0.57602054
## [26,] 0.59758056 -0.14387657
## [27,] -0.38897543 -0.96642960
## [28,] 0.92625344 -2.43318794
## [29,] 2.37848103 -0.29265168
## [30,] -1.92002029 0.77421421
## [31,] 0.02171307 2.35431876
## [32,] 0.90484929 0.73828289
## [33,] 0.16957482 0.17851431
## [34,] -0.22933673 1.06681057
## [35,] -0.41887317 -1.11247702
## [36,] 1.19743305 -0.04351493
## [37,] -2.03303444 0.45894297
## [38,] -0.14746561 0.92020718
## [39,] -0.98713131 0.15643425
## [40,] 1.79077106 -0.53178847
## [41,] 0.32172261 2.49790667
## [42,] -0.53631481 -0.11428142
## [43,] 0.08707907 0.56611520
## [44,] 1.09111328 -0.23707538
## [45,] -1.20890908 0.61527430
## [46,] 1.23067671 0.11096303
## [47,] -0.58958571 -1.84402557
## [48,] 0.62719227 1.20426916
## [49,] 1.21166804 1.31490334
Aplicación: Gráfico correlaciones factores y variables
Fp=fit.pca$scores
grap.fact=data.frame(y1=Fp[,1],y2=Fp[,2],lab=1:49,grupo=gorrion[,6])
ggplot(grap.fact,aes(y1,y2,label=lab,color=grupo))+geom_point()+geom_text(vjust = 2)+xlab("Fact 1")+ylab("Fact 2") + geom_hline(yintercept=0,size=1) +geom_vline(xintercept=0,size=1)