library(haven) #Librería para leer archivos .sav
library(corrplot) #Librería para el gráfico de correlaciones

## corrplot 0.92 loaded

library(corrr) #Otra opción de librería para el cálculo y gráfico de correlaciones
library(ggplot2)
library(grid)
library(gridExtra)
library(GPArotation)

setwd("~/ESPOL/5TO/Estadística Multivariante/Practica 3 Estadistica Multivariante")
datos <- read_sav("Ciencias_Letras.sav") #Lee archivo de datos
datos$Alumno<-as.factor(datos$Alumno) # Convierte variable Alumno en factor

library(psych) #Librería para realizar análisis descriptivos

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

describeBy(datos[,2:6],skew=FALSE,range=FALSE)

## Warning in describeBy(datos[, 2:6], skew = FALSE, range = FALSE): no grouping
## variable requested

##     vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## CNa    1 20 5.80 1.01    6.0    5.81 1.48   4   8     4  0.08    -0.42 0.22
## Mat    2 20 5.70 1.26    6.0    5.75 1.48   3   8     5 -0.21    -0.75 0.28
## Fra    3 20 5.60 1.10    5.5    5.56 0.74   4   8     4  0.33    -0.73 0.24
## Lat    4 20 6.05 1.15    6.0    6.06 1.48   4   8     4 -0.09    -0.88 0.26
## Lit    5 20 5.65 1.04    6.0    5.62 1.48   4   8     4  0.15    -0.42 0.23

d2 <- outlier(datos[,2:6],cex=1)

datos.d2 <- data.frame(datos[,2:6],d2)
pairs.panels(datos[,2:6],bg=c("yellow","blue")[(d2>10)+1],pch=21,stars=TRUE)

r.datos <- cor(datos[,2:6])
cor.plot(r.datos, numbers = TRUE)

# Matriz de correlaciones y letras
(coor.ci.le=round(cor(datos[,2:6]),3))

##       CNa   Mat   Fra   Lat   Lit
## CNa 1.000 0.656 0.497 0.420 0.584
## Mat 0.656 1.000 0.099 0.230 0.317
## Fra 0.497 0.099 1.000 0.813 0.841
## Lat 0.420 0.230 0.813 1.000 0.766
## Lit 0.584 0.317 0.841 0.766 1.000

PROCESO DEL ANALISIS FACTORIAL

Aqui vamos a empezar con las pruebas estadisticas antes de empezar con el AF:

# Se puede conocer la presencia de multicolinealidad al evaluar el
# Determinante de la matriz de correlaciones de las variables
# ingresadas al estudio:
R=cor(datos[,2:6])
det(R)

## [1] 0.02585506

EXPLICACION

Un determinante bajo, es decir, cercano a 0, indica alta multicolinealidad entre las variables. De ser igual a cero(matriz no singular), esto indicaria que algunas de las variables son linealmente dependientes y no se podrian realizar ciertos calculos necesarios para la modelizacion multivariante. En este caso observamos que es muy cercano a 0, lo que sugiere un alto nivel de colinealidad en el conjunto de variables involucradas en la matriz.

# Por consiguiente vamos a ubicar el calculo de los estimadores del Test de Barlett y el MSA(KMO)

bartlett.test(datos[,2:6])

## 
##  Bartlett test of homogeneity of variances
## 
## data:  datos[, 2:6]
## Bartlett's K-squared = 1.1957, df = 4, p-value = 0.8788

Ahora bien la explicacion del Test de Bartlett es muy similar a la prueba de Chi-cuadrado, es decir a valores pequeños de K-squared expresa que existe una homogeneidad en las varianzas y por lo consiguiente existen un buen numero de correlaciones positivas entre las variables y el p-value y se puede corroborarlo fijando en H0: Las varianzas son homogeneas(correlaciones aceptables), con una alfa = 0.05. Otra explicacion es que dicho test proporciona la probabilidad de que la matriz de correlacion de las variables sea una matriz de identidad.

# Test MSA o KMO:
KMO(datos[,2:6])

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = datos[, 2:6])
## Overall MSA =  0.67
## MSA for each item = 
##  CNa  Mat  Fra  Lat  Lit 
## 0.65 0.42 0.63 0.76 0.81

La medida de adecuacion de la muestra MSA o KMO (Kaiser -Meyer-Oikin) contrasta si las correlaciones parciales entre las variables son suficientemente pequeñas. El estadistico KMO varia entre 0 y 1. Los valores pequeños indican que en analisis factorial puede ser no una buena idea, dado que las correlaciones entre los pares de variables no pueden ser explicadas por otras variables. Los menores que 0.5 indican que no debe utilizarse el AF con la matriz de datos que se estan analizando

fit.pa.none=fa(R,nfactors=2,fm="pa", rotate="none",n.obs=220)

## maximum iteration exceeded

fit.pa.rot.vari=fa(R,nfactors=2,fm="pa", rotate="varimax",n.obs=220)

## maximum iteration exceeded

fit.pa.rot.pro=fa(R,nfactors=2,fm="pa", rotate="quartimax",n.obs=220)

## maximum iteration exceeded

sort(fit.pa.none$communality,decreasing=T) -> c1
sort(fit.pa.rot.vari$communality,decreasing=T) -> c2
sort(fit.pa.rot.pro$communality,decreasing=T) -> c3

fit.ml.none=fa(R,nfactors=2,fm="ml", rotate="none",n.obs=220)
fit.ml.rot.vari=fa(R,nfactors=2,fm="ml", rotate="varimax",n.obs=220)
fit.ml.rot.pro=fa(R,nfactors=2,fm="ml", rotate="quartimax",n.obs=220)
sort(fit.ml.none$communality,decreasing=T) -> c4
sort(fit.ml.rot.vari$communality,decreasing=T) -> c5
sort(fit.ml.rot.pro$communality,decreasing=T) -> c6
(cbind(c1,c2,c3,c4,c5,c6))

##            c1        c2        c3        c4        c5        c6
## Fra 0.9616610 0.9616610 0.9616610 0.9950018 0.9950018 0.9950018
## Mat 0.8755444 0.8755444 0.8755444 0.9601570 0.9601570 0.9601570
## Lit 0.8249298 0.8249298 0.8249298 0.7948098 0.7948098 0.7948098
## Lat 0.6933034 0.6933034 0.6933034 0.7089913 0.7089913 0.7089913
## CNa 0.6429004 0.6429004 0.6429004 0.6219474 0.6219474 0.6219474

#Normalidad de los datos

par(mfrow=c(2,3))
qqnorm(datos$CNa)
qqline(datos$CNa)
qqnorm(datos$Mat)
qqline(datos$Mat)
qqnorm(datos$Fra)
qqline(datos$Fra)
qqnorm(datos$Lat)
qqline(datos$Lat)
qqnorm(datos$Lit)
qqline(datos$Lit)

#Realizamos un test de normalidad univariante (Shapiro-Wilk):
shapiro.test(datos$CNa)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$CNa
## W = 0.91452, p-value = 0.07777

shapiro.test(datos$Mat)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Mat
## W = 0.94253, p-value = 0.2675

shapiro.test(datos$Fra)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Fra
## W = 0.91746, p-value = 0.08855

shapiro.test(datos$Lat)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Lat
## W = 0.92895, p-value = 0.1474

shapiro.test(datos$Lit)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Lit
## W = 0.90352, p-value = 0.04803

#En este caso podriamos decir que todas nuestras vairables son normales.

#Test de Normalidad Multivariante
library(MVN)

## Warning: package 'MVN' was built under R version 4.2.2

(mnvdemisdatos <- mvn(datos[,2:6],mvnTest="mardia"))

## $multivariateNormality
##              Test         Statistic            p value Result
## 1 Mardia Skewness  19.3219379270703  0.985316618918034    YES
## 2 Mardia Kurtosis -1.67841282727946 0.0932665352017992    YES
## 3             MVN              <NA>               <NA>    YES
## 
## $univariateNormality
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling    CNa       0.9161    0.0158    NO    
## 2 Anderson-Darling    Mat       0.5960    0.1055    YES   
## 3 Anderson-Darling    Fra       0.7535    0.0415    NO    
## 4 Anderson-Darling    Lat       0.6461    0.0784    YES   
## 5 Anderson-Darling    Lit       0.9553    0.0126    NO    
## 
## $Descriptives
##      n Mean  Std.Dev Median Min Max 25th 75th        Skew   Kurtosis
## CNa 20 5.80 1.005249    6.0   4   8    5    6  0.08269092 -0.4194141
## Mat 20 5.70 1.260743    6.0   3   8    5    7 -0.20659525 -0.7518840
## Fra 20 5.60 1.095445    5.5   4   8    5    6  0.32863353 -0.7283333
## Lat 20 6.05 1.145931    6.0   4   8    5    7 -0.09120884 -0.8782383
## Lit 20 5.65 1.039990    6.0   4   8    5    6  0.15268958 -0.4181932

#Matriz de correlaciones8ciencias y letras
(cor.ci.le=round(cor(datos[,2:6]),3))

##       CNa   Mat   Fra   Lat   Lit
## CNa 1.000 0.656 0.497 0.420 0.584
## Mat 0.656 1.000 0.099 0.230 0.317
## Fra 0.497 0.099 1.000 0.813 0.841
## Lat 0.420 0.230 0.813 1.000 0.766
## Lit 0.584 0.317 0.841 0.766 1.000

#Extraccion por componente principales
R=cor(datos[,2:6])
(fit.pca=principal(R,nfactors=2,rotate="none"))

## Principal Components Analysis
## Call: principal(r = R, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   h2    u2 com
## CNa 0.76  0.50 0.83 0.169 1.7
## Mat 0.50  0.81 0.90 0.098 1.7
## Fra 0.87 -0.40 0.92 0.075 1.4
## Lat 0.85 -0.32 0.83 0.169 1.3
## Lit 0.92 -0.17 0.87 0.125 1.1
## 
##                        PC1  PC2
## SS loadings           3.17 1.20
## Proportion Var        0.63 0.24
## Cumulative Var        0.63 0.87
## Proportion Explained  0.73 0.27
## Cumulative Proportion 0.73 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
## 
## Fit based upon off diagonal values = 0.99

plot(fit.pca,labels=row.names(R),cex=0.7,xlim=c(-2,2),ylim=c(-0.8,0.8))

INTERPRETACIÓN

UNICIDAD(Uniquenesses): Es el porcentaje de varianza que no ha sido explicada por el facotr y es igual a: 1 - Comunalidad.

COMUNALIDAD(loadings-Saturaciones): Porcentaje de la variablidad de la variable explicada por ese Factor.

FACTOR1-FACTOR2: Algebraicamente, un facot se estima mediante una combinacion lineal de variables ovservadas. Cuando se encuentran los factores de “mejor ajuste2”, debe recordarse que estos factores no son únicos. Se puede demostrar que cualquier rotación de los factores que mejor se ajuste es también el mejor factor. La rotación de factores se utiliza para ajustar la varianza que explicará el factor

Si todos los factores explican conjuntamente un gran porcentaje de varianza en una variable dada, esa variable tiene una alta comunalidad(y por lo tanto una singularidad baja)

SS loadings: La saturación acumulada

Proportion Var: Proporción de la varianza

Cumulative Var: Varianza acumulada

Las cargas de patrón son los coeficientes de regresión de la ecuación del modelo factorial. En el modelo, la variable que se predice se entiende como un rasgo observado estandarizado (en un FA de correlaciones) o centrado(en un FA de covarianzas), mientras que los factores se entienden como rasgos latentes estandarizados (con varianza 1). Los coeficientes de esa combinación lineal son los valores de la matriz de patrones. COmo se desprende claramente, los coeficientes de patrón nunca son mayores que los coeficientes de estructura, que son correlaciones o covarianzas entre la variable prevista y los factores normalizados.

plot(fit.pa.none,labels=row.names(R),cex=0.7,xlim=c(-2,2),ylim=c(-0.8,0.8))

fit.pa=fa(datos[,2:6], nfactors=2,fm="pa", rotate = "none", n.obs=220,scores="regression")

## maximum iteration exceeded

(fit.pa$scores)

##               PA1         PA2
##  [1,]  0.06447078  1.33146498
##  [2,] -0.08873103 -0.74854046
##  [3,] -0.42494511  0.25493202
##  [4,]  0.32416622  1.67395867
##  [5,]  0.42179630  0.16510852
##  [6,] -0.16092353 -1.58222764
##  [7,] -0.50276841 -0.22511928
##  [8,] -0.43901941  0.36352134
##  [9,]  0.74252973 -1.02911585
## [10,]  0.13113956 -0.55888895
## [11,] -0.14836267  1.08751882
## [12,] -1.52293847  0.24730839
## [13,]  0.19488856  0.02975168
## [14,]  2.34401328 -0.05354858
## [15,]  0.05602728  1.08641843
## [16,] -2.08173416 -1.06422932
## [17,]  0.69285503 -1.72634580
## [18,]  1.22209577 -0.49697066
## [19,] -1.51449497  0.49235495
## [20,]  0.68993525  0.75264876

Fp = fit.pa$scores
grap.fact=data.frame(y1=Fp[,1],y2=Fp[,2],lab=1:20,grupo=datos$Alumno)
ggplot(grap.fact,aes(y1,y2,label=lab))+geom_point()+geom_text(vjust=2)+xlab("Fact1")+ylab("Fact 2") + geom_hline(yintercept=0)+geom_vline(xintercept=0,size=1)

fit.ml=fa(datos[,2:6], nfactors=2,fm="ml", rotate = "none", n.obs=220,scores="regression")
(fit.ml$scores)

##              ML2        ML1
##  [1,] -0.6281014  1.0173812
##  [2,]  0.2825004 -0.5540239
##  [3,] -0.5145675  0.2173476
##  [4,] -0.7388025  1.7945847
##  [5,]  0.4032203  0.2572924
##  [6,]  0.6035472 -1.3332075
##  [7,] -0.3923255 -0.5643957
##  [8,] -0.6707367  0.2098258
##  [9,]  1.1648392 -0.5228020
## [10,]  0.4430236 -0.5373570
## [11,] -0.7105400  1.0044752
## [12,] -1.3642898 -0.5948386
## [13,]  0.1646125  0.2368646
## [14,]  2.0571969  1.0969395
## [15,] -0.5854657  1.0124192
## [16,] -1.1711497 -2.1716534
## [17,]  1.5994196 -1.2895019
## [18,]  1.2146613  0.2710683
## [19,] -1.4069255 -0.5898765
## [20,]  0.2498834  1.0394579

plot(fit.ml,labels=row.names(R),cex=0.7,xlim=c(-1,1),ylim=c(-1,1))

QUINTO PASO

Calcular la matriz inicial de factores no rotados para que nos dé una indicación preliminar acerca del número de factores a extraer. La matriz de factores contiene las cargas factoriales para cada variable sobre cada factor. Considerando que matriz de factores no ha sido aún rotada, el investigador deberá estar interesado en la mejor combinación lineal de variables, es decir, en encontrar aquella combinación particular de las variables originales que cuenta con el mayor porcentaje de varianza de los datos. En consecuencia, el primer factor puede contemplarse como el mejor resumen de las relaciones lineales que los datos manifiestan. El segundo factor se define como la segunda mejor combinación lineal de las variables, los factores subsiguientes se definen de forma análoga hasta haber agotado la varianza de los datos.

NOTA IMPORTANTE: las cargas factoriales son las correlaciones entre cada variable y el factor.

Es por esto que la solución factorial no rotada puede no suministrar un patrón significativo de cargas de las variables, razón por la cuál el siguiente paso es:

• Hacer uso de un método de rotación para lograr soluciones factoriales más simples y teóricamente más significativas

En una tercera etapa, el investigador valora la necesidad de especificar de nuevo el modelo de factores debido a:

• Eliminación de variables

• Cambiar el método de rotación

• Extraer un número diferente de factores, o

• Cambiar el método de extracción (AF o ACP)

ROTACION DE FACTORES

Concretamente, se giran los ejes a partir del origen, en referencia a los factores, hasta alcanzar una determinada posición. Como se indicó previamente, el primer factor tiende a ser un factor general dado que cuenta del mayor porcentaje de varianza. El segundo y los siguientes factores se basan en la varianza residual(cada uno explica porcentajes de varianza cada vez menores). El efecto de rotar la matriz de factores es redistribuir la varianza de los primeros factores a los últimos para lograr un patrón de factores más simple y teóricamente más significativo.

Existen dos tipos de rotación y son:

• Ortogonal: Los ejes de rotacion forman un angulod e 90 grados

• Oblicua: Los ejes de rotacion forman distintos angulo

METODOS

Varimax: Metodo de rotacion ortogonal que minimiza el número de variables que tienen saturaciones altas en cada factor. Simplifica la interpretación de los factores.

Metodo Oblimin directo: Metodo para la rotacion oblicua(no ortogonal). El metodo necesita un valor delta servira para ajustar los ejes de las saturaciones buscan una mejor aproximacion, pero considerando que la varianza se distribuira entre todos los factores y no habremos logrado el objetivo de reducir la dimensionalidad.

Metodo quartimax: Metodo de rotacion que minimiza el numero de factores necesarios para explicar cada variable.

Metodo equamax: Metodo de rotacion que es combinacion del metodo varimax, que simplifica los factores, y el metodo de quartimax, que simplifica las variables. Se minimiza tanto el numero de variables que saturan alto en un factor como el numero de factores necesarios para explicar una variable

Rotacion Promax: Rotacion oblicua que permite que los factores esten correlacionados. Esta rotacion se puede calcular mas rapidamente que una rotacion oblimin directa, por lo que es util para conjuntos de datos grandes.

El metodo mas usado es varimax

NOTA IMPORTANTE

La opcion rotation de R incluye “varimax”, “promax”, “none”. Se agrega la opcion scores == “regression” o “Barlett” para generar los factores. Se usa la opcion covmat para ingresar la matriz de correlacion o covarianza directamente.

Determinar el numero de factores

Una decision crucial en el AF es determinar el numero de factores a extraerse. El paquet nFactor ofrece un conjunto de rutinas para direccionar esta decision. Desde luego, cualquier solucion factorial debe ser interpretable para ser de utilidad.

Fp = fit.ml$scores
grap.fact=data.frame(y1=Fp[,1],y2=Fp[,2],lab=1:20,grupo=datos$Alumno)
ggplot(grap.fact,aes(y1,y2,label=lab))+geom_point()+geom_text(vjust=2)+xlab("Fact1")+ylab("Fact 2") + geom_hline(yintercept=0)+geom_vline(xintercept=0,size=1)

library(nFactors)

## Warning: package 'nFactors' was built under R version 4.2.2

## Loading required package: lattice

## 
## Attaching package: 'nFactors'

## The following object is masked from 'package:lattice':
## 
##     parallel

ev <- eigen(cor(datos[2:6]))
ap <- parallel(subject=nrow(datos[,2:6]),var=ncol(datos[,2:6]),rep=100,cent=0.05)
nS <- nScree(x=ev$values,aparallel=ap$eigen$qevpea)
plotnScree(nS,xlab="Numero de componentes", ylab = "Autovalores",main="Solucion por autovalores para determinar el numero de factores o componentes")

(fit.pa=fa(datos[,2:6], nfactors=2,fm="pa", rotate = "none", n.obs=220))

## maximum iteration exceeded

## Factor Analysis using method =  pa
## Call: fa(r = datos[, 2:6], nfactors = 2, n.obs = 220, rotate = "none", 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1   PA2   h2    u2 com
## CNa 0.70  0.39 0.64 0.357 1.6
## Mat 0.48  0.80 0.88 0.124 1.6
## Fra 0.90 -0.40 0.96 0.038 1.4
## Lat 0.80 -0.23 0.69 0.307 1.2
## Lit 0.90 -0.14 0.82 0.175 1.0
## 
##                        PA1  PA2
## SS loadings           2.97 1.03
## Proportion Var        0.59 0.21
## Cumulative Var        0.59 0.80
## Proportion Explained  0.74 0.26
## Cumulative Proportion 0.74 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.66 with Chi Square of  60.31
## The degrees of freedom for the model are 1  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  20 with the empirical chi square  0.19  with prob <  0.66 
## The total number of observations was  20  with Likelihood Chi Square =  1.62  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.863
## RMSEA index =  0.169  and the 90 % confidence intervals are  0 0.67
## BIC =  -1.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.99 0.94
## Multiple R square of scores with factors          0.97 0.89
## Minimum correlation of possible factor scores     0.94 0.78

(fit.pa.rot.vari=fa(datos[,2:6], nfactors=2,fm="pa", rotate = "varimax", n.obs=220))

## maximum iteration exceeded

## Factor Analysis using method =  pa
## Call: fa(r = datos[, 2:6], nfactors = 2, n.obs = 220, rotate = "varimax", 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1  PA2   h2    u2 com
## CNa 0.42 0.68 0.64 0.357 1.7
## Mat 0.04 0.93 0.88 0.124 1.0
## Fra 0.98 0.08 0.96 0.038 1.0
## Lat 0.81 0.18 0.69 0.307 1.1
## Lit 0.85 0.31 0.82 0.175 1.3
## 
##                        PA1  PA2
## SS loadings           2.53 1.47
## Proportion Var        0.51 0.29
## Cumulative Var        0.51 0.80
## Proportion Explained  0.63 0.37
## Cumulative Proportion 0.63 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.66 with Chi Square of  60.31
## The degrees of freedom for the model are 1  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  20 with the empirical chi square  0.19  with prob <  0.66 
## The total number of observations was  20  with Likelihood Chi Square =  1.62  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.863
## RMSEA index =  0.169  and the 90 % confidence intervals are  0 0.67
## BIC =  -1.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.98 0.94
## Multiple R square of scores with factors          0.97 0.89
## Minimum correlation of possible factor scores     0.94 0.78

(fit.pa.rot.pro=fa(datos[,2:6], nfactors=2,fm="pa", rotate = "quartimax", n.obs=220))

## maximum iteration exceeded

## Factor Analysis using method =  pa
## Call: fa(r = datos[, 2:6], nfactors = 2, n.obs = 220, rotate = "quartimax", 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1   PA2   h2    u2 com
## CNa 0.50  0.63 0.64 0.357 1.9
## Mat 0.15  0.92 0.88 0.124 1.0
## Fra 0.98 -0.03 0.96 0.038 1.0
## Lat 0.83  0.09 0.69 0.307 1.0
## Lit 0.88  0.21 0.82 0.175 1.1
## 
##                        PA1  PA2
## SS loadings           2.69 1.30
## Proportion Var        0.54 0.26
## Cumulative Var        0.54 0.80
## Proportion Explained  0.67 0.33
## Cumulative Proportion 0.67 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.66 with Chi Square of  60.31
## The degrees of freedom for the model are 1  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  20 with the empirical chi square  0.19  with prob <  0.66 
## The total number of observations was  20  with Likelihood Chi Square =  1.62  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.863
## RMSEA index =  0.169  and the 90 % confidence intervals are  0 0.67
## BIC =  -1.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.99 0.94
## Multiple R square of scores with factors          0.97 0.89
## Minimum correlation of possible factor scores     0.95 0.77

par(mfrow=c(1,3))
plot(fit.pa,labels=row.names(R),cex=0.7,xlim=c(-1,1),ylim=c(-1,1))
plot(fit.pa.rot.vari,labels=row.names(R),cex=0.7,xlim=c(-1,1),ylim=c(-1,1))
plot(fit.pa.rot.pro,labels=row.names(R),cex=0.7,xlim=c(-1,1),ylim=c(-1,1))

par(mfrow=c(1,3)) #3 rows an 2 Columns
plot(fit.pa, labels=row.names(R), cex=.7, xlim=c(-1,1))
plot(fit.pa.rot.pro, labels=row.names(R), cex=.7, xlim=c(-1,1))
plot(fit.pa.rot.vari, labels=row.names(R), cex=.7, xlim=c(-1,1))

(fit.pa = fa(datos[,2:6],nfactors = 2,fm="pa",n.obs=220,scores="regression"))

## maximum iteration exceeded

## Factor Analysis using method =  pa
## Call: fa(r = datos[, 2:6], nfactors = 2, n.obs = 220, scores = "regression", 
##     fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       PA1   PA2   h2    u2 com
## CNa  0.36  0.62 0.64 0.357 1.6
## Mat -0.07  0.95 0.88 0.124 1.0
## Fra  1.01 -0.11 0.96 0.038 1.0
## Lat  0.82  0.03 0.69 0.307 1.0
## Lit  0.85  0.15 0.82 0.175 1.1
## 
##                        PA1  PA2
## SS loadings           2.61 1.39
## Proportion Var        0.52 0.28
## Cumulative Var        0.52 0.80
## Proportion Explained  0.65 0.35
## Cumulative Proportion 0.65 1.00
## 
##  With factor correlations of 
##     PA1 PA2
## PA1 1.0 0.3
## PA2 0.3 1.0
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.66 with Chi Square of  60.31
## The degrees of freedom for the model are 1  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  20 with the empirical chi square  0.19  with prob <  0.66 
## The total number of observations was  20  with Likelihood Chi Square =  1.62  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.863
## RMSEA index =  0.169  and the 90 % confidence intervals are  0 0.67
## BIC =  -1.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.99 0.95
## Multiple R square of scores with factors          0.98 0.90
## Minimum correlation of possible factor scores     0.95 0.79

(fit.pa.rot.vari=fa(datos[,2:6],nfactors=2,fm="pa",rotate="varimax",n.obs=220,scores="regression"))

## maximum iteration exceeded

## Factor Analysis using method =  pa
## Call: fa(r = datos[, 2:6], nfactors = 2, n.obs = 220, rotate = "varimax", 
##     scores = "regression", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1  PA2   h2    u2 com
## CNa 0.42 0.68 0.64 0.357 1.7
## Mat 0.04 0.93 0.88 0.124 1.0
## Fra 0.98 0.08 0.96 0.038 1.0
## Lat 0.81 0.18 0.69 0.307 1.1
## Lit 0.85 0.31 0.82 0.175 1.3
## 
##                        PA1  PA2
## SS loadings           2.53 1.47
## Proportion Var        0.51 0.29
## Cumulative Var        0.51 0.80
## Proportion Explained  0.63 0.37
## Cumulative Proportion 0.63 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.66 with Chi Square of  60.31
## The degrees of freedom for the model are 1  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  20 with the empirical chi square  0.19  with prob <  0.66 
## The total number of observations was  20  with Likelihood Chi Square =  1.62  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.863
## RMSEA index =  0.169  and the 90 % confidence intervals are  0 0.67
## BIC =  -1.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.98 0.94
## Multiple R square of scores with factors          0.97 0.89
## Minimum correlation of possible factor scores     0.94 0.78

(fit.pa.rot.pro=fa(datos[,2:6],nfactors=2,fm="pa",rotate= "quartimax",n.obs=220,scores="regression"))

## maximum iteration exceeded

## Factor Analysis using method =  pa
## Call: fa(r = datos[, 2:6], nfactors = 2, n.obs = 220, rotate = "quartimax", 
##     scores = "regression", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA1   PA2   h2    u2 com
## CNa 0.50  0.63 0.64 0.357 1.9
## Mat 0.15  0.92 0.88 0.124 1.0
## Fra 0.98 -0.03 0.96 0.038 1.0
## Lat 0.83  0.09 0.69 0.307 1.0
## Lit 0.88  0.21 0.82 0.175 1.1
## 
##                        PA1  PA2
## SS loadings           2.69 1.30
## Proportion Var        0.54 0.26
## Cumulative Var        0.54 0.80
## Proportion Explained  0.67 0.33
## Cumulative Proportion 0.67 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.66 with Chi Square of  60.31
## The degrees of freedom for the model are 1  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  20 with the empirical chi square  0.19  with prob <  0.66 
## The total number of observations was  20  with Likelihood Chi Square =  1.62  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.863
## RMSEA index =  0.169  and the 90 % confidence intervals are  0 0.67
## BIC =  -1.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.99 0.94
## Multiple R square of scores with factors          0.97 0.89
## Minimum correlation of possible factor scores     0.95 0.77

Fp1=fit.pa$scores
Fp2=fit.pa.rot.vari$scores
Fp3=fit.pa.rot.pro$scores
grap.fact1=data.frame(y1=Fp1[,1],y2=Fp1[,2],lab=1:20,grupo=datos$Alumno)
grap.fact2=data.frame(y1=Fp2[,1],y2=Fp2[,2],lab=1:20,grupo=datos$Alumno)
grap.fact3=data.frame(y1=Fp3[,1],y2=Fp3[,2],lab=1:20,grupo=datos$Alumno)

p1<-ggplot(grap.fact1,aes(y1,y2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("Fact 1")+ylab("Fact 2") + geom_hline(yintercept=0,size=1) +geom_vline(xintercept=0,size=1)
p2<-ggplot(grap.fact2,aes(y1,y2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("Fact 1")+ylab("Fact 2") + geom_hline(yintercept=0,size=1) +geom_vline(xintercept=0,size=1)
p3<-ggplot(grap.fact3,aes(y1,y2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("Fact 1")+ylab("Fact 2") + geom_hline(yintercept=0,size=1) +geom_vline(xintercept=0,size=1)

grid.arrange(p1, p2,p3, nrow = 1)

par(mfrow=c(1,3))
fa.diagram(fit.pa)
fa.diagram(fit.pa.rot.vari)
fa.diagram(fit.pa.rot.pro)

library(GPArotation)
par(mfrow=c(1,3))
biplot.psych(fa(datos[,2:6], nfactors=2, fm="pa", rotate="none"), main=paste("Biplot con rotación","none"), col=c(2,3,4),pch=c(21,18))

## maximum iteration exceeded

biplot.psych(fa(datos[,2:6], nfactors=2, fm="pa", rotate="varimax"), main=paste("Biplot con rotación","varimax"), col=c(2,3,4),pch=c(21,18))

## maximum iteration exceeded

biplot.psych(fa(datos[,2:6], nfactors=2, fm="pa", rotate="Promax"), main=paste("Biplot con rotación","quartimax"), col=c(2,3,4),pch=c(21,18))

## maximum iteration exceeded

fit.pa=fa(R, nfactors = 2, fm="pa", rotate = "none", n.obs = 220)

## maximum iteration exceeded

fit.pa.rot.vari=fa(R, nfactors = 2, fm="pa", rotate = "varimax", n.obs = 220)

## maximum iteration exceeded

fit.pa.rot.pro=fa(R, nfactors = 2, fm="pa", rotate = "quartimax", n.obs = 220)

## maximum iteration exceeded

corvar.pa.none<-fit.pa.none$loadings[,1:2]
corvar.pa.rot.vari<-fit.pa.rot.vari$loadings[,1:2]
corvar.pa.rot.pro<-fit.pa.rot.pro$loadings[,1:2]

fit.ml.none=fa(R,nfactors=2, fm="ml",rotate="none",n.obs = 220)
fit.ml.rot.vari=fa(R,nfactors=2, fm="ml",rotate="varimax",n.obs = 220)
fit.pa.rot.pro=fa(R, nfactors=2, fm="ml",rotate="quartimax",n.obs=220)

corvar.ml.none<-fit.ml.none$loadings[,1:2]
corvar.ml.rot.vari<-fit.ml.rot.vari$loadings[,1:2]
corvar.ml.rot.pro<-fit.ml.rot.pro$loadings[,1:2]

p1<-plot(-1:1,-1:1,type="n",asp=1,xlab="F1", ylab="F2")+
 abline(h=0, v=0, lty=2, col=8)+
 symbols(0,0,1,inches=F,add=T)+
 symbols(0,0,sqrt(.5),inches=F,add=T)+
 arrows(0,0,corvar.pa.none[,1], corvar.pa.none[,2],length=.1)+
 text(corvar.pa.none[,1],corvar.pa.none[,2],colnames(datos[,2:6]),pos=4, offset=.6, col=2, font=2)

p2<-plot(-1:1,-1:1,type="n",asp=1,xlab="F1", ylab="F2")+
 abline(h=0, v=0, lty=2, col=8)+
 symbols(0,0,1,inches=F,add=T)+
 symbols(0,0,sqrt(.5),inches=F,add=T)+
 arrows(0,0,corvar.pa.rot.vari[,1], corvar.pa.rot.vari[,2],length=.1)+
 text(corvar.pa.rot.vari[,1],corvar.pa.rot.vari[,2],colnames(datos[,2:6]),pos=4, offset=.6, col=2, font=2)

p3<-plot(-1:1,-1:1,type="n",asp=1,xlab="F1", ylab="F2")+
 abline(h=0, v=0, lty=2, col=8)+
 symbols(0,0,1,inches=F,add=T)+
 symbols(0,0,sqrt(.5),inches=F,add=T)+
 arrows(0,0,corvar.pa.rot.pro[,1], corvar.pa.rot.pro[,2],length=.1)+
 text(corvar.pa.rot.pro[,1],corvar.pa.rot.pro[,2],colnames(datos[,2:6]),pos=4, offset=.6, col=2, font=2)

p4<-plot(-1:1,-1:1,type="n",asp=1,xlab="F1", ylab="F2")+
 abline(h=0, v=0, lty=2, col=8)+
 symbols(0,0,1,inches=F,add=T)+
 symbols(0,0,sqrt(.5),inches=F,add=T)+
 arrows(0,0,corvar.ml.none[,1], corvar.ml.none[,2],length=.1)+
 text(corvar.ml.none[,1],corvar.ml.none[,2],colnames(datos),pos=4, offset=.6, col=2, font=2)

corvar.ml.none<-fit.ml.none$loadings[,1:2]
corvar.ml.rot.vari<-fit.ml.rot.vari$loadings[,1:2]
corvar.ml.rot.pro

##           ML2         ML1
## CNa 0.5224070  0.59079462
## Mat 0.1386091  0.98782046
## Fra 0.9791756 -0.03704285
## Lat 0.8341655  0.11471314
## Lit 0.8689721  0.19924189

p5<-plot(-1:1,-1:1,type="n",asp=1,xlab="F1", ylab="F2")+
 abline(h=0, v=0, lty=2, col=8)+
 symbols(0,0,1,inches=F,add=T)+
 symbols(0,0,sqrt(.5),inches=F,add=T)+
 arrows(0,0,corvar.ml.rot.vari[,1], corvar.ml.rot.vari[,2],length=.1)+
 text(corvar.ml.rot.vari[,1],corvar.ml.rot.vari[,2],colnames(datos[,2:6]),pos=4, offset=.6, col=2, font=2)

modelo_varimax<-fa(R,nfactors=2, rotate = "varimax", fa="minres")
fa.diagram(modelo_varimax)

Practica Analisis Factorial

Oscar Bolaños Flores

2022-10-27